Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:30 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:30 +0300
commit: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree: 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/ARM
parent: 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download: ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
72 files changed, 7233 insertions, 7233 deletions
diff --git a/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
index bb81233cf8..6c6f49ff6d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/A15SDOptimizer.cpp
@@ -359,7 +359,7 @@ void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
    SmallVector<MachineInstr *, 8> Front;
    Front.push_back(MI);
    while (Front.size() != 0) {
-     MI = Front.pop_back_val();
+     MI = Front.pop_back_val(); 
 
      // If we have already explored this MachineInstr, ignore it.
      if (Reached.find(MI) != Reached.end())
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARM.h b/contrib/libs/llvm12/lib/Target/ARM/ARM.h
index f4fdc98037..2fbfabe828 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARM.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARM.h
@@ -37,7 +37,7 @@ class PassRegistry;
 
 Pass *createMVETailPredicationPass();
 FunctionPass *createARMLowOverheadLoopsPass();
-FunctionPass *createARMBlockPlacementPass();
+FunctionPass *createARMBlockPlacementPass(); 
 Pass *createARMParallelDSPPass();
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
@@ -56,8 +56,8 @@ InstructionSelector *
 createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI,
                              const ARMRegisterBankInfo &RBI);
 Pass *createMVEGatherScatterLoweringPass();
-FunctionPass *createARMSLSHardeningPass();
-FunctionPass *createARMIndirectThunks();
+FunctionPass *createARMSLSHardeningPass(); 
+FunctionPass *createARMIndirectThunks(); 
 
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
@@ -72,10 +72,10 @@ void initializeThumb2ITBlockPass(PassRegistry &);
 void initializeMVEVPTBlockPass(PassRegistry &);
 void initializeMVEVPTOptimisationsPass(PassRegistry &);
 void initializeARMLowOverheadLoopsPass(PassRegistry &);
-void initializeARMBlockPlacementPass(PassRegistry &);
+void initializeARMBlockPlacementPass(PassRegistry &); 
 void initializeMVETailPredicationPass(PassRegistry &);
 void initializeMVEGatherScatterLoweringPass(PassRegistry &);
-void initializeARMSLSHardeningPass(PassRegistry &);
+void initializeARMSLSHardeningPass(PassRegistry &); 
 
 } // end namespace llvm
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARM.td b/contrib/libs/llvm12/lib/Target/ARM/ARM.td
index 3d0a0bf7f8..9540784c7f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARM.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARM.td
@@ -535,10 +535,10 @@ def HasV8_6aOps   : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
                                    [HasV8_5aOps, FeatureBF16,
                                     FeatureMatMulInt8]>;
 
-def HasV8_7aOps   : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
-                                   "Support ARM v8.7a instructions",
-                                   [HasV8_6aOps]>;
-
+def HasV8_7aOps   : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true", 
+                                   "Support ARM v8.7a instructions", 
+                                   [HasV8_6aOps]>; 
+ 
 def HasV8_1MMainlineOps : SubtargetFeature<
                "v8.1m.main", "HasV8_1MMainlineOps", "true",
                "Support ARM v8-1M Mainline instructions",
@@ -563,20 +563,20 @@ foreach i = {0-7} in
                                               [HasCDEOps]>;
 
 //===----------------------------------------------------------------------===//
-// Control codegen mitigation against Straight Line Speculation vulnerability.
-//===----------------------------------------------------------------------===//
-
-def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr",
-  "HardenSlsRetBr", "true",
-  "Harden against straight line speculation across RETurn and BranchRegister "
-  "instructions">;
-def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr",
-  "HardenSlsBlr", "true",
-  "Harden against straight line speculation across indirect calls">;
-
-
-
-//===----------------------------------------------------------------------===//
+// Control codegen mitigation against Straight Line Speculation vulnerability. 
+//===----------------------------------------------------------------------===// 
+ 
+def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", 
+  "HardenSlsRetBr", "true", 
+  "Harden against straight line speculation across RETurn and BranchRegister " 
+  "instructions">; 
+def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", 
+  "HardenSlsBlr", "true", 
+  "Harden against straight line speculation across indirect calls">; 
+ 
+ 
+ 
+//===----------------------------------------------------------------------===// 
 // ARM Processor subtarget features.
 //
 
@@ -616,14 +616,14 @@ def ProcA77     : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
                                    "Cortex-A77 ARM processors", []>;
 def ProcA78     : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
                                    "Cortex-A78 ARM processors", []>;
-def ProcA78C    : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
-                                   "Cortex-A78C ARM processors", []>;
+def ProcA78C    : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C", 
+                                   "Cortex-A78C ARM processors", []>; 
 def ProcX1      : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
                                    "Cortex-X1 ARM processors", []>;
 
-def ProcV1      : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
-                                   "NeoverseV1", "Neoverse-V1 ARM processors", []>;
-
+def ProcV1      : SubtargetFeature<"neoverse-v1", "ARMProcFamily", 
+                                   "NeoverseV1", "Neoverse-V1 ARM processors", []>; 
+ 
 def ProcKrait   : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
                                    "Qualcomm Krait processors", []>;
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
@@ -662,8 +662,8 @@ def ProcR52     : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
 
 def ProcM3      : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
                                    "Cortex-M3 ARM processors", []>;
-def ProcM7      : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7",
-                                   "Cortex-M7 ARM processors", []>;
+def ProcM7      : SubtargetFeature<"m7", "ARMProcFamily", "CortexM7", 
+                                   "Cortex-M7 ARM processors", []>; 
 
 //===----------------------------------------------------------------------===//
 // ARM Helper classes.
@@ -852,19 +852,19 @@ def ARMv86a   : Architecture<"armv8.6-a", "ARMv86a",  [HasV8_6aOps,
                                                        FeatureCRC,
                                                        FeatureRAS,
                                                        FeatureDotProd]>;
-def ARMv87a   : Architecture<"armv8.7-a", "ARMv86a",  [HasV8_7aOps,
-                                                       FeatureAClass,
-                                                       FeatureDB,
-                                                       FeatureFPARMv8,
-                                                       FeatureNEON,
-                                                       FeatureDSP,
-                                                       FeatureTrustZone,
-                                                       FeatureMP,
-                                                       FeatureVirtualization,
-                                                       FeatureCrypto,
-                                                       FeatureCRC,
-                                                       FeatureRAS,
-                                                       FeatureDotProd]>;
+def ARMv87a   : Architecture<"armv8.7-a", "ARMv86a",  [HasV8_7aOps, 
+                                                       FeatureAClass, 
+                                                       FeatureDB, 
+                                                       FeatureFPARMv8, 
+                                                       FeatureNEON, 
+                                                       FeatureDSP, 
+                                                       FeatureTrustZone, 
+                                                       FeatureMP, 
+                                                       FeatureVirtualization, 
+                                                       FeatureCrypto, 
+                                                       FeatureCRC, 
+                                                       FeatureRAS, 
+                                                       FeatureDotProd]>; 
 
 def ARMv8r    : Architecture<"armv8-r",   "ARMv8r",   [HasV8Ops,
                                                        FeatureRClass,
@@ -919,14 +919,14 @@ def ARMv6j   : Architecture<"armv6j",      "ARMv7a",   [ARMv6]>;
 def ARMv7k   : Architecture<"armv7k",      "ARMv7a",   [ARMv7a]>;
 def ARMv7s   : Architecture<"armv7s",      "ARMv7a",   [ARMv7a]>;
 
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "ARMRegisterInfo.td"
-include "ARMRegisterBanks.td"
-include "ARMCallingConv.td"
+//===----------------------------------------------------------------------===// 
+// Register File Description 
+//===----------------------------------------------------------------------===// 
 
+include "ARMRegisterInfo.td" 
+include "ARMRegisterBanks.td" 
+include "ARMCallingConv.td" 
+ 
 //===----------------------------------------------------------------------===//
 // ARM schedules.
 //===----------------------------------------------------------------------===//
@@ -935,25 +935,25 @@ include "ARMPredicates.td"
 include "ARMSchedule.td"
 
 //===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "ARMInstrInfo.td"
-def ARMInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// ARM schedules
-//
-include "ARMScheduleV6.td"
-include "ARMScheduleA8.td"
-include "ARMScheduleA9.td"
-include "ARMScheduleSwift.td"
-include "ARMScheduleR52.td"
-include "ARMScheduleA57.td"
-include "ARMScheduleM4.td"
-include "ARMScheduleM7.td"
-
-//===----------------------------------------------------------------------===//
+// Instruction Descriptions 
+//===----------------------------------------------------------------------===// 
+ 
+include "ARMInstrInfo.td" 
+def ARMInstrInfo : InstrInfo; 
+ 
+//===----------------------------------------------------------------------===// 
+// ARM schedules 
+// 
+include "ARMScheduleV6.td" 
+include "ARMScheduleA8.td" 
+include "ARMScheduleA9.td" 
+include "ARMScheduleSwift.td" 
+include "ARMScheduleR52.td" 
+include "ARMScheduleA57.td" 
+include "ARMScheduleM4.td" 
+include "ARMScheduleM7.td" 
+ 
+//===----------------------------------------------------------------------===// 
 // ARM processors
 //
 // Dummy CPU, used to target architectures
@@ -1193,10 +1193,10 @@ def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
                                                          FeatureUseMISched,
                                                          FeatureHasNoBranchPredictor]>;
 
-def : ProcessorModel<"cortex-m7", CortexM7Model,        [ARMv7em,
-                                                         ProcM7,
-                                                         FeatureFPARMv8_D16,
-                                                         FeatureUseMISched]>;
+def : ProcessorModel<"cortex-m7", CortexM7Model,        [ARMv7em, 
+                                                         ProcM7, 
+                                                         FeatureFPARMv8_D16, 
+                                                         FeatureUseMISched]>; 
 
 def : ProcNoItin<"cortex-m23",                          [ARMv8mBaseline,
                                                          FeatureNoMovt]>;
@@ -1310,14 +1310,14 @@ def : ProcNoItin<"cortex-a78",                          [ARMv82a, ProcA78,
                                                          FeatureFullFP16,
                                                          FeatureDotProd]>;
 
-def : ProcNoItin<"cortex-a78c",                         [ARMv82a, ProcA78C,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC,
-                                                         FeatureDotProd,
-                                                         FeatureFullFP16]>;
-
+def : ProcNoItin<"cortex-a78c",                         [ARMv82a, ProcA78C, 
+                                                         FeatureHWDivThumb, 
+                                                         FeatureHWDivARM, 
+                                                         FeatureCrypto, 
+                                                         FeatureCRC, 
+                                                         FeatureDotProd, 
+                                                         FeatureFullFP16]>; 
+ 
 def : ProcNoItin<"cortex-x1",                           [ARMv82a, ProcX1,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
@@ -1326,15 +1326,15 @@ def : ProcNoItin<"cortex-x1",                           [ARMv82a, ProcX1,
                                                          FeatureFullFP16,
                                                          FeatureDotProd]>;
 
-def : ProcNoItin<"neoverse-v1",                         [ARMv84a,
-                                                         FeatureHWDivThumb,
-                                                         FeatureHWDivARM,
-                                                         FeatureCrypto,
-                                                         FeatureCRC,
-                                                         FeatureFullFP16,
-                                                         FeatureBF16,
-                                                         FeatureMatMulInt8]>;
-
+def : ProcNoItin<"neoverse-v1",                         [ARMv84a, 
+                                                         FeatureHWDivThumb, 
+                                                         FeatureHWDivARM, 
+                                                         FeatureCrypto, 
+                                                         FeatureCRC, 
+                                                         FeatureFullFP16, 
+                                                         FeatureBF16, 
+                                                         FeatureMatMulInt8]>; 
+ 
 def : ProcNoItin<"neoverse-n1",                         [ARMv82a,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
@@ -1342,11 +1342,11 @@ def : ProcNoItin<"neoverse-n1",                         [ARMv82a,
                                                          FeatureCRC,
                                                          FeatureDotProd]>;
 
-def : ProcNoItin<"neoverse-n2",                         [ARMv85a,
-                                                         FeatureBF16,
-                                                         FeatureMatMulInt8,
-                                                         FeaturePerfMon]>;
-
+def : ProcNoItin<"neoverse-n2",                         [ARMv85a, 
+                                                         FeatureBF16, 
+                                                         FeatureMatMulInt8, 
+                                                         FeaturePerfMon]>; 
+ 
 def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
                                                          FeatureHasRetAddrStack,
                                                          FeatureNEONForFP,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
index 04e21867d5..31059e5910 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -285,7 +285,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       return false;
     case 'y': // Print a VFP single precision register as indexed double.
       if (MI->getOperand(OpNum).isReg()) {
-        MCRegister Reg = MI->getOperand(OpNum).getReg().asMCReg();
+        MCRegister Reg = MI->getOperand(OpNum).getReg().asMCReg(); 
         const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
         // Find the 'd' register that has this 's' register as a sub-register,
         // and determine the lane number.
@@ -903,7 +903,7 @@ void ARMAsmPrinter::emitMachineConstantPoolValue(
 
   MCSymbol *MCSym;
   if (ACPV->isLSDA()) {
-    MCSym = getMBBExceptionSym(MF->front());
+    MCSym = getMBBExceptionSym(MF->front()); 
   } else if (ACPV->isBlockAddress()) {
     const BlockAddress *BA =
       cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
@@ -1897,7 +1897,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // LSJLJEH:
     Register SrcReg = MI->getOperand(0).getReg();
     Register ValReg = MI->getOperand(1).getReg();
-    MCSymbol *Label = OutContext.createTempSymbol("SJLJEH");
+    MCSymbol *Label = OutContext.createTempSymbol("SJLJEH"); 
     OutStreamer->AddComment("eh_setjmp begin");
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
       .addReg(ValReg)
@@ -2180,49 +2180,49 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
   case ARM::PATCHABLE_TAIL_CALL:
     LowerPATCHABLE_TAIL_CALL(*MI);
     return;
-  case ARM::SpeculationBarrierISBDSBEndBB: {
-    // Print DSB SYS + ISB
-    MCInst TmpInstDSB;
-    TmpInstDSB.setOpcode(ARM::DSB);
-    TmpInstDSB.addOperand(MCOperand::createImm(0xf));
-    EmitToStreamer(*OutStreamer, TmpInstDSB);
-    MCInst TmpInstISB;
-    TmpInstISB.setOpcode(ARM::ISB);
-    TmpInstISB.addOperand(MCOperand::createImm(0xf));
-    EmitToStreamer(*OutStreamer, TmpInstISB);
-    return;
-  }
-  case ARM::t2SpeculationBarrierISBDSBEndBB: {
-    // Print DSB SYS + ISB
-    MCInst TmpInstDSB;
-    TmpInstDSB.setOpcode(ARM::t2DSB);
-    TmpInstDSB.addOperand(MCOperand::createImm(0xf));
-    TmpInstDSB.addOperand(MCOperand::createImm(ARMCC::AL));
-    TmpInstDSB.addOperand(MCOperand::createReg(0));
-    EmitToStreamer(*OutStreamer, TmpInstDSB);
-    MCInst TmpInstISB;
-    TmpInstISB.setOpcode(ARM::t2ISB);
-    TmpInstISB.addOperand(MCOperand::createImm(0xf));
-    TmpInstISB.addOperand(MCOperand::createImm(ARMCC::AL));
-    TmpInstISB.addOperand(MCOperand::createReg(0));
-    EmitToStreamer(*OutStreamer, TmpInstISB);
-    return;
-  }
-  case ARM::SpeculationBarrierSBEndBB: {
-    // Print SB
-    MCInst TmpInstSB;
-    TmpInstSB.setOpcode(ARM::SB);
-    EmitToStreamer(*OutStreamer, TmpInstSB);
-    return;
-  }
-  case ARM::t2SpeculationBarrierSBEndBB: {
-    // Print SB
-    MCInst TmpInstSB;
-    TmpInstSB.setOpcode(ARM::t2SB);
-    EmitToStreamer(*OutStreamer, TmpInstSB);
-    return;
-  }
-  }
+  case ARM::SpeculationBarrierISBDSBEndBB: { 
+    // Print DSB SYS + ISB 
+    MCInst TmpInstDSB; 
+    TmpInstDSB.setOpcode(ARM::DSB); 
+    TmpInstDSB.addOperand(MCOperand::createImm(0xf)); 
+    EmitToStreamer(*OutStreamer, TmpInstDSB); 
+    MCInst TmpInstISB; 
+    TmpInstISB.setOpcode(ARM::ISB); 
+    TmpInstISB.addOperand(MCOperand::createImm(0xf)); 
+    EmitToStreamer(*OutStreamer, TmpInstISB); 
+    return; 
+  }
+  case ARM::t2SpeculationBarrierISBDSBEndBB: { 
+    // Print DSB SYS + ISB 
+    MCInst TmpInstDSB; 
+    TmpInstDSB.setOpcode(ARM::t2DSB); 
+    TmpInstDSB.addOperand(MCOperand::createImm(0xf)); 
+    TmpInstDSB.addOperand(MCOperand::createImm(ARMCC::AL)); 
+    TmpInstDSB.addOperand(MCOperand::createReg(0)); 
+    EmitToStreamer(*OutStreamer, TmpInstDSB); 
+    MCInst TmpInstISB; 
+    TmpInstISB.setOpcode(ARM::t2ISB); 
+    TmpInstISB.addOperand(MCOperand::createImm(0xf)); 
+    TmpInstISB.addOperand(MCOperand::createImm(ARMCC::AL)); 
+    TmpInstISB.addOperand(MCOperand::createReg(0)); 
+    EmitToStreamer(*OutStreamer, TmpInstISB); 
+    return; 
+  } 
+  case ARM::SpeculationBarrierSBEndBB: { 
+    // Print SB 
+    MCInst TmpInstSB; 
+    TmpInstSB.setOpcode(ARM::SB); 
+    EmitToStreamer(*OutStreamer, TmpInstSB); 
+    return; 
+  } 
+  case ARM::t2SpeculationBarrierSBEndBB: { 
+    // Print SB 
+    MCInst TmpInstSB; 
+    TmpInstSB.setOpcode(ARM::t2SB); 
+    EmitToStreamer(*OutStreamer, TmpInstSB); 
+    return; 
+  } 
+  } 
 
   MCInst TmpInst;
   LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e418d53b56..d3047e1ae7 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -19,7 +19,7 @@
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h" 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -36,8 +36,8 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/CodeGen/MultiHazardRecognizer.h"
+#include "llvm/CodeGen/MachineScheduler.h" 
+#include "llvm/CodeGen/MultiHazardRecognizer.h" 
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -134,43 +134,43 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
   return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
 }
 
-// Called during:
-// - pre-RA scheduling
-// - post-RA scheduling when FeatureUseMISched is set
-ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(
-    const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
-  MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
-
-  // We would like to restrict this hazard recognizer to only
-  // post-RA scheduling; we can tell that we're post-RA because we don't
-  // track VRegLiveness.
-  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM
-  //            banks banked on bit 2.  Assume that TCMs are in use.
-  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())
-    MHR->AddHazardRecognizer(
-        std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));
-
-  // Not inserting ARMHazardRecognizerFPMLx because that would change
-  // legacy behavior
-
-  auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
-  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
-  return MHR;
-}
-
-// Called during post-RA scheduling when FeatureUseMISched is not set
+// Called during: 
+// - pre-RA scheduling 
+// - post-RA scheduling when FeatureUseMISched is set 
+ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( 
+    const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { 
+  MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); 
+ 
+  // We would like to restrict this hazard recognizer to only 
+  // post-RA scheduling; we can tell that we're post-RA because we don't 
+  // track VRegLiveness. 
+  // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM 
+  //            banks banked on bit 2.  Assume that TCMs are in use. 
+  if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) 
+    MHR->AddHazardRecognizer( 
+        std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true)); 
+ 
+  // Not inserting ARMHazardRecognizerFPMLx because that would change 
+  // legacy behavior 
+ 
+  auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); 
+  MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); 
+  return MHR; 
+} 
+ 
+// Called during post-RA scheduling when FeatureUseMISched is not set 
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
-  MultiHazardRecognizer *MHR = new MultiHazardRecognizer();
-
+  MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); 
+ 
   if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
-    MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());
-
-  auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
-  if (BHR)
-    MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));
-  return MHR;
+    MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>()); 
+ 
+  auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); 
+  if (BHR) 
+    MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); 
+  return MHR; 
 }
 
 MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
@@ -351,8 +351,8 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   TBB = nullptr;
   FBB = nullptr;
 
-  MachineBasicBlock::instr_iterator I = MBB.instr_end();
-  if (I == MBB.instr_begin())
+  MachineBasicBlock::instr_iterator I = MBB.instr_end(); 
+  if (I == MBB.instr_begin()) 
     return false; // Empty blocks are easy.
   --I;
 
@@ -364,12 +364,12 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     // out.
     bool CantAnalyze = false;
 
-    // Skip over DEBUG values, predicated nonterminators and speculation
-    // barrier terminators.
-    while (I->isDebugInstr() || !I->isTerminator() ||
-           isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||
-           I->getOpcode() == ARM::t2DoLoopStartTP){
-      if (I == MBB.instr_begin())
+    // Skip over DEBUG values, predicated nonterminators and speculation 
+    // barrier terminators. 
+    while (I->isDebugInstr() || !I->isTerminator() || 
+           isSpeculationBarrierEndBBOpcode(I->getOpcode()) || 
+           I->getOpcode() == ARM::t2DoLoopStartTP){ 
+      if (I == MBB.instr_begin()) 
         return false;
       --I;
     }
@@ -393,7 +393,7 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
       Cond.push_back(I->getOperand(2));
     } else if (I->isReturn()) {
       // Returns can't be analyzed, but we should run cleanup.
-      CantAnalyze = true;
+      CantAnalyze = true; 
     } else {
       // We encountered other unrecognized terminator. Bail out immediately.
       return true;
@@ -414,30 +414,30 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
       // unconditional branch.
       if (AllowModify) {
         MachineBasicBlock::iterator DI = std::next(I);
-        while (DI != MBB.instr_end()) {
+        while (DI != MBB.instr_end()) { 
           MachineInstr &InstToDelete = *DI;
           ++DI;
-          // Speculation barriers must not be deleted.
-          if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))
-            continue;
+          // Speculation barriers must not be deleted. 
+          if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode())) 
+            continue; 
           InstToDelete.eraseFromParent();
         }
       }
     }
 
-    if (CantAnalyze) {
-      // We may not be able to analyze the block, but we could still have
-      // an unconditional branch as the last instruction in the block, which
-      // just branches to layout successor. If this is the case, then just
-      // remove it if we're allowed to make modifications.
-      if (AllowModify && !isPredicated(MBB.back()) &&
-          isUncondBranchOpcode(MBB.back().getOpcode()) &&
-          TBB && MBB.isLayoutSuccessor(TBB))
-        removeBranch(MBB);
+    if (CantAnalyze) { 
+      // We may not be able to analyze the block, but we could still have 
+      // an unconditional branch as the last instruction in the block, which 
+      // just branches to layout successor. If this is the case, then just 
+      // remove it if we're allowed to make modifications. 
+      if (AllowModify && !isPredicated(MBB.back()) && 
+          isUncondBranchOpcode(MBB.back().getOpcode()) && 
+          TBB && MBB.isLayoutSuccessor(TBB)) 
+        removeBranch(MBB); 
       return true;
-    }
+    } 
 
-    if (I == MBB.instr_begin())
+    if (I == MBB.instr_begin()) 
       return false;
 
     --I;
@@ -586,18 +586,18 @@ bool ARMBaseInstrInfo::PredicateInstruction(
     MachineOperand &PMO = MI.getOperand(PIdx);
     PMO.setImm(Pred[0].getImm());
     MI.getOperand(PIdx+1).setReg(Pred[1].getReg());
-
-    // Thumb 1 arithmetic instructions do not set CPSR when executed inside an
-    // IT block. This affects how they are printed.
-    const MCInstrDesc &MCID = MI.getDesc();
-    if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
-      assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand");
-      assert((MI.getOperand(1).isDead() ||
-              MI.getOperand(1).getReg() != ARM::CPSR) &&
-             "if conversion tried to stop defining used CPSR");
-      MI.getOperand(1).setReg(ARM::NoRegister);
-    }
-
+ 
+    // Thumb 1 arithmetic instructions do not set CPSR when executed inside an 
+    // IT block. This affects how they are printed. 
+    const MCInstrDesc &MCID = MI.getDesc(); 
+    if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { 
+      assert(MCID.OpInfo[1].isOptionalDef() && "CPSR def isn't expected operand"); 
+      assert((MI.getOperand(1).isDead() || 
+              MI.getOperand(1).getReg() != ARM::CPSR) && 
+             "if conversion tried to stop defining used CPSR"); 
+      MI.getOperand(1).setReg(ARM::NoRegister); 
+    } 
+ 
     return true;
   }
   return false;
@@ -629,23 +629,23 @@ bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
   }
 }
 
-bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
-                                         std::vector<MachineOperand> &Pred,
-                                         bool SkipDead) const {
+bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, 
+                                         std::vector<MachineOperand> &Pred, 
+                                         bool SkipDead) const { 
   bool Found = false;
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
-    bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
-    bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
-    if (ClobbersCPSR || IsCPSR) {
-
-      // Filter out T1 instructions that have a dead CPSR,
-      // allowing IT blocks to be generated containing T1 instructions
-      const MCInstrDesc &MCID = MI.getDesc();
-      if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&
-          SkipDead)
-        continue;
-
+    bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR); 
+    bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; 
+    if (ClobbersCPSR || IsCPSR) { 
+ 
+      // Filter out T1 instructions that have a dead CPSR, 
+      // allowing IT blocks to be generated containing T1 instructions 
+      const MCInstrDesc &MCID = MI.getDesc(); 
+      if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() && 
+          SkipDead) 
+        continue; 
+ 
       Pred.push_back(MO);
       Found = true;
     }
@@ -703,23 +703,23 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
   if (!isEligibleForITBlock(&MI))
     return false;
 
-  const MachineFunction *MF = MI.getParent()->getParent();
+  const MachineFunction *MF = MI.getParent()->getParent(); 
   const ARMFunctionInfo *AFI =
-      MF->getInfo<ARMFunctionInfo>();
+      MF->getInfo<ARMFunctionInfo>(); 
 
   // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
   // In their ARM encoding, they can't be encoded in a conditional form.
   if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
     return false;
 
-  // Make indirect control flow changes unpredicable when SLS mitigation is
-  // enabled.
-  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();
-  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))
-    return false;
-  if (ST.hardenSlsBlr() && isIndirectCall(MI))
-    return false;
-
+  // Make indirect control flow changes unpredicable when SLS mitigation is 
+  // enabled. 
+  const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>(); 
+  if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI)) 
+    return false; 
+  if (ST.hardenSlsBlr() && isIndirectCall(MI)) 
+    return false; 
+ 
   if (AFI->isThumb2Function()) {
     if (getSubtarget().restrictIT())
       return isV8EligibleForIT(&MI);
@@ -802,14 +802,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
       Size = alignTo(Size, 4);
     return Size;
   }
-  case ARM::SpeculationBarrierISBDSBEndBB:
-  case ARM::t2SpeculationBarrierISBDSBEndBB:
-    // This gets lowered to 2 4-byte instructions.
-    return 8;
-  case ARM::SpeculationBarrierSBEndBB:
-  case ARM::t2SpeculationBarrierSBEndBB:
-    // This gets lowered to 1 4-byte instructions.
-    return 4;
+  case ARM::SpeculationBarrierISBDSBEndBB: 
+  case ARM::t2SpeculationBarrierISBDSBEndBB: 
+    // This gets lowered to 2 4-byte instructions. 
+    return 8; 
+  case ARM::SpeculationBarrierSBEndBB: 
+  case ARM::t2SpeculationBarrierSBEndBB: 
+    // This gets lowered to 1 4-byte instructions. 
+    return 4; 
   }
 }
 
@@ -2175,12 +2175,12 @@ ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,
   // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.
   // ARM has a condition code field in every predicable instruction, using it
   // doesn't change code size.
-  if (!Subtarget.isThumb2())
-    return 0;
-
-  // It's possible that the size of the IT is restricted to a single block.
-  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;
-  return divideCeil(NumInsts, MaxInsts) * 2;
+  if (!Subtarget.isThumb2()) 
+    return 0; 
+ 
+  // It's possible that the size of the IT is restricted to a single block. 
+  unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4; 
+  return divideCeil(NumInsts, MaxInsts) * 2; 
 }
 
 unsigned
@@ -3417,7 +3417,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     case ARM::t2SUBspImm:
     case ARM::t2ADDri:
     case ARM::t2SUBri:
-      MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);
+      MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC); 
   }
   return true;
 }
@@ -4838,14 +4838,14 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
       }
     }
   }
-  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {
-    assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());
-    if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||
-        MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {
-      ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";
-      return false;
-    }
-  }
+  if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) { 
+    assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm()); 
+    if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) || 
+        MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) { 
+      ErrInfo = "Incorrect array index for MVE_VMOV_q_rr"; 
+      return false; 
+    } 
+  } 
   return true;
 }
 
@@ -5531,8 +5531,8 @@ unsigned llvm::ConstantMaterializationCost(unsigned Val,
       return ForCodesize ? 4 : 1;
     if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs
       return ForCodesize ? 8 : 2;
-    if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs
-      return ForCodesize ? 8 : 2;
+    if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs 
+      return ForCodesize ? 8 : 2; 
   }
   if (Subtarget->useMovt()) // MOVW + MOVT
     return ForCodesize ? 8 : 2;
@@ -5637,32 +5637,32 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
 /// | Frame overhead in Bytes |      2 |   4 |
 /// | Stack fixup required    |     No |  No |
 /// +-------------------------+--------+-----+
-///
-/// \p MachineOutlinerDefault implies that the function should be called with
-/// a save and restore of LR to the stack.
-///
-/// That is,
-///
-/// I1     Save LR                    OUTLINED_FUNCTION:
-/// I2 --> BL OUTLINED_FUNCTION       I1
-/// I3     Restore LR                 I2
-///                                   I3
-///                                   BX LR
-///
-/// +-------------------------+--------+-----+
-/// |                         | Thumb2 | ARM |
-/// +-------------------------+--------+-----+
-/// | Call overhead in Bytes  |      8 |  12 |
-/// | Frame overhead in Bytes |      2 |   4 |
-/// | Stack fixup required    |    Yes | Yes |
-/// +-------------------------+--------+-----+
+/// 
+/// \p MachineOutlinerDefault implies that the function should be called with 
+/// a save and restore of LR to the stack. 
+/// 
+/// That is, 
+/// 
+/// I1     Save LR                    OUTLINED_FUNCTION: 
+/// I2 --> BL OUTLINED_FUNCTION       I1 
+/// I3     Restore LR                 I2 
+///                                   I3 
+///                                   BX LR 
+/// 
+/// +-------------------------+--------+-----+ 
+/// |                         | Thumb2 | ARM | 
+/// +-------------------------+--------+-----+ 
+/// | Call overhead in Bytes  |      8 |  12 | 
+/// | Frame overhead in Bytes |      2 |   4 | 
+/// | Stack fixup required    |    Yes | Yes | 
+/// +-------------------------+--------+-----+ 
 
 enum MachineOutlinerClass {
   MachineOutlinerTailCall,
   MachineOutlinerThunk,
   MachineOutlinerNoLRSave,
-  MachineOutlinerRegSave,
-  MachineOutlinerDefault
+  MachineOutlinerRegSave, 
+  MachineOutlinerDefault 
 };
 
 enum MachineOutlinerMBBFlags {
@@ -5680,9 +5680,9 @@ struct OutlinerCosts {
   const int FrameNoLRSave;
   const int CallRegSave;
   const int FrameRegSave;
-  const int CallDefault;
-  const int FrameDefault;
-  const int SaveRestoreLROnStack;
+  const int CallDefault; 
+  const int FrameDefault; 
+  const int SaveRestoreLROnStack; 
 
   OutlinerCosts(const ARMSubtarget &target)
       : CallTailCall(target.isThumb() ? 4 : 4),
@@ -5692,10 +5692,10 @@ struct OutlinerCosts {
         CallNoLRSave(target.isThumb() ? 4 : 4),
         FrameNoLRSave(target.isThumb() ? 4 : 4),
         CallRegSave(target.isThumb() ? 8 : 12),
-        FrameRegSave(target.isThumb() ? 2 : 4),
-        CallDefault(target.isThumb() ? 8 : 12),
-        FrameDefault(target.isThumb() ? 2 : 4),
-        SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}
+        FrameRegSave(target.isThumb() ? 2 : 4), 
+        CallDefault(target.isThumb() ? 8 : 12), 
+        FrameDefault(target.isThumb() ? 2 : 4), 
+        SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} 
 };
 
 unsigned
@@ -5720,37 +5720,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
   return 0u;
 }
 
-// Compute liveness of LR at the point after the interval [I, E), which
-// denotes a *backward* iteration through instructions. Used only for return
-// basic blocks, which do not end with a tail call.
-static bool isLRAvailable(const TargetRegisterInfo &TRI,
-                          MachineBasicBlock::reverse_iterator I,
-                          MachineBasicBlock::reverse_iterator E) {
-  // At the end of the function LR dead.
-  bool Live = false;
-  for (; I != E; ++I) {
-    const MachineInstr &MI = *I;
-
-    // Check defs of LR.
-    if (MI.modifiesRegister(ARM::LR, &TRI))
-      Live = false;
-
-    // Check uses of LR.
-    unsigned Opcode = MI.getOpcode();
-    if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
-        Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
-        Opcode == ARM::tBXNS_RET) {
-      // These instructions use LR, but it's not an (explicit or implicit)
-      // operand.
-      Live = true;
-      continue;
-    }
-    if (MI.readsRegister(ARM::LR, &TRI))
-      Live = true;
-  }
-  return !Live;
-}
-
+// Compute liveness of LR at the point after the interval [I, E), which 
+// denotes a *backward* iteration through instructions. Used only for return 
+// basic blocks, which do not end with a tail call. 
+static bool isLRAvailable(const TargetRegisterInfo &TRI, 
+                          MachineBasicBlock::reverse_iterator I, 
+                          MachineBasicBlock::reverse_iterator E) { 
+  // At the end of the function LR dead. 
+  bool Live = false; 
+  for (; I != E; ++I) { 
+    const MachineInstr &MI = *I; 
+ 
+    // Check defs of LR. 
+    if (MI.modifiesRegister(ARM::LR, &TRI)) 
+      Live = false; 
+ 
+    // Check uses of LR. 
+    unsigned Opcode = MI.getOpcode(); 
+    if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || 
+        Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || 
+        Opcode == ARM::tBXNS_RET) { 
+      // These instructions use LR, but it's not an (explicit or implicit) 
+      // operand. 
+      Live = true; 
+      continue; 
+    } 
+    if (MI.readsRegister(ARM::LR, &TRI)) 
+      Live = true; 
+  } 
+  return !Live; 
+} 
+ 
 outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
@@ -5796,7 +5796,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
     // Erase every candidate that violates the restrictions above. (It could be
     // true that we have viable candidates, so it's not worth bailing out in
     // the case that, say, 1 out of 20 candidates violate the restructions.)
-    llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
+    llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); 
 
     // If the sequence doesn't have enough candidates left, then we're done.
     if (RepeatedSequenceLocs.size() < 2)
@@ -5816,8 +5816,8 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
       };
 
   OutlinerCosts Costs(Subtarget);
-  unsigned FrameID = MachineOutlinerDefault;
-  unsigned NumBytesToCreateFrame = Costs.FrameDefault;
+  unsigned FrameID = MachineOutlinerDefault; 
+  unsigned NumBytesToCreateFrame = Costs.FrameDefault; 
 
   // If the last instruction in any candidate is a terminator, then we should
   // tail call all of the candidates.
@@ -5826,31 +5826,31 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
     NumBytesToCreateFrame = Costs.FrameTailCall;
     SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);
   } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
-             LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||
-             LastInstrOpcode == ARM::tBLXr ||
-             LastInstrOpcode == ARM::tBLXr_noip ||
+             LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || 
+             LastInstrOpcode == ARM::tBLXr || 
+             LastInstrOpcode == ARM::tBLXr_noip || 
              LastInstrOpcode == ARM::tBLXi) {
     FrameID = MachineOutlinerThunk;
     NumBytesToCreateFrame = Costs.FrameThunk;
     SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);
   } else {
     // We need to decide how to emit calls + frames. We can always emit the same
-    // frame if we don't need to save to the stack. If we have to save to the
-    // stack, then we need a different frame.
+    // frame if we don't need to save to the stack. If we have to save to the 
+    // stack, then we need a different frame. 
     unsigned NumBytesNoStackCalls = 0;
     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
 
     for (outliner::Candidate &C : RepeatedSequenceLocs) {
       C.initLRU(TRI);
-      // LR liveness is overestimated in return blocks, unless they end with a
-      // tail call.
-      const auto Last = C.getMBB()->rbegin();
-      const bool LRIsAvailable =
-          C.getMBB()->isReturnBlock() && !Last->isCall()
-              ? isLRAvailable(TRI, Last,
-                              (MachineBasicBlock::reverse_iterator)C.front())
-              : C.LRU.available(ARM::LR);
-      if (LRIsAvailable) {
+      // LR liveness is overestimated in return blocks, unless they end with a 
+      // tail call. 
+      const auto Last = C.getMBB()->rbegin(); 
+      const bool LRIsAvailable = 
+          C.getMBB()->isReturnBlock() && !Last->isCall() 
+              ? isLRAvailable(TRI, Last, 
+                              (MachineBasicBlock::reverse_iterator)C.front()) 
+              : C.LRU.available(ARM::LR); 
+      if (LRIsAvailable) { 
         FrameID = MachineOutlinerNoLRSave;
         NumBytesNoStackCalls += Costs.CallNoLRSave;
         C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
@@ -5865,157 +5865,157 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
         C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);
         CandidatesWithoutStackFixups.push_back(C);
       }
-
-      // Is SP used in the sequence at all? If not, we don't have to modify
-      // the stack, so we are guaranteed to get the same frame.
-      else if (C.UsedInSequence.available(ARM::SP)) {
-        NumBytesNoStackCalls += Costs.CallDefault;
-        C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);
-        CandidatesWithoutStackFixups.push_back(C);
-      }
-
-      // If we outline this, we need to modify the stack. Pretend we don't
-      // outline this by saving all of its bytes.
-      else
-        NumBytesNoStackCalls += SequenceSize;
+ 
+      // Is SP used in the sequence at all? If not, we don't have to modify 
+      // the stack, so we are guaranteed to get the same frame. 
+      else if (C.UsedInSequence.available(ARM::SP)) { 
+        NumBytesNoStackCalls += Costs.CallDefault; 
+        C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault); 
+        CandidatesWithoutStackFixups.push_back(C); 
+      } 
+ 
+      // If we outline this, we need to modify the stack. Pretend we don't 
+      // outline this by saving all of its bytes. 
+      else 
+        NumBytesNoStackCalls += SequenceSize; 
     }
 
-    // If there are no places where we have to save LR, then note that we don't
-    // have to update the stack. Otherwise, give every candidate the default
-    // call type
-    if (NumBytesNoStackCalls <=
-        RepeatedSequenceLocs.size() * Costs.CallDefault) {
+    // If there are no places where we have to save LR, then note that we don't 
+    // have to update the stack. Otherwise, give every candidate the default 
+    // call type 
+    if (NumBytesNoStackCalls <= 
+        RepeatedSequenceLocs.size() * Costs.CallDefault) { 
       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
-      FrameID = MachineOutlinerNoLRSave;
+      FrameID = MachineOutlinerNoLRSave; 
     } else
-      SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
-  }
-
-  // Does every candidate's MBB contain a call?  If so, then we might have a
-  // call in the range.
-  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
-    // check if the range contains a call.  These require a save + restore of
-    // the link register.
-    if (std::any_of(FirstCand.front(), FirstCand.back(),
-                    [](const MachineInstr &MI) { return MI.isCall(); }))
-      NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
-
-    // Handle the last instruction separately.  If it is tail call, then the
-    // last instruction is a call, we don't want to save + restore in this
-    // case.  However, it could be possible that the last instruction is a
-    // call without it being valid to tail call this sequence.  We should
-    // consider this as well.
-    else if (FrameID != MachineOutlinerThunk &&
-             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
-      NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
-  }
-
+      SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); 
+  }
+
+  // Does every candidate's MBB contain a call?  If so, then we might have a 
+  // call in the range. 
+  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 
+    // check if the range contains a call.  These require a save + restore of 
+    // the link register. 
+    if (std::any_of(FirstCand.front(), FirstCand.back(), 
+                    [](const MachineInstr &MI) { return MI.isCall(); })) 
+      NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; 
+ 
+    // Handle the last instruction separately.  If it is tail call, then the 
+    // last instruction is a call, we don't want to save + restore in this 
+    // case.  However, it could be possible that the last instruction is a 
+    // call without it being valid to tail call this sequence.  We should 
+    // consider this as well. 
+    else if (FrameID != MachineOutlinerThunk && 
+             FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall()) 
+      NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; 
+  } 
+ 
   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
                                     NumBytesToCreateFrame, FrameID);
 }
 
-bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
-                                                 int64_t Fixup,
-                                                 bool Updt) const {
-  int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP);
-  unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);
-  if (SPIdx < 0)
-    // No SP operand
-    return true;
-  else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))
-    // If SP is not the base register we can't do much
-    return false;
-
-  // Stack might be involved but addressing mode doesn't handle any offset.
-  // Rq: AddrModeT1_[1|2|4] don't operate on SP
-  if (AddrMode == ARMII::AddrMode1        // Arithmetic instructions
-      || AddrMode == ARMII::AddrMode4     // Load/Store Multiple
-      || AddrMode == ARMII::AddrMode6     // Neon Load/Store Multiple
-      || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
-      || AddrMode == ARMII::AddrModeT2_pc // PCrel access
-      || AddrMode == ARMII::AddrMode2     // Used by PRE and POST indexed LD/ST
-      || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
-      || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
-      || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
-      || AddrMode == ARMII::AddrModeNone)
-    return false;
-
-  unsigned NumOps = MI->getDesc().getNumOperands();
-  unsigned ImmIdx = NumOps - 3;
-
-  const MachineOperand &Offset = MI->getOperand(ImmIdx);
-  assert(Offset.isImm() && "Is not an immediate");
-  int64_t OffVal = Offset.getImm();
-
-  if (OffVal < 0)
-    // Don't override data if the are below SP.
-    return false;
-
-  unsigned NumBits = 0;
-  unsigned Scale = 1;
-
-  switch (AddrMode) {
-  case ARMII::AddrMode3:
-    if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)
-      return false;
-    OffVal = ARM_AM::getAM3Offset(OffVal);
-    NumBits = 8;
-    break;
-  case ARMII::AddrMode5:
-    if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)
-      return false;
-    OffVal = ARM_AM::getAM5Offset(OffVal);
-    NumBits = 8;
-    Scale = 4;
-    break;
-  case ARMII::AddrMode5FP16:
-    if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)
-      return false;
-    OffVal = ARM_AM::getAM5FP16Offset(OffVal);
-    NumBits = 8;
-    Scale = 2;
-    break;
-  case ARMII::AddrModeT2_i8:
-    NumBits = 8;
-    break;
-  case ARMII::AddrModeT2_i8s4:
-    // FIXME: Values are already scaled in this addressing mode.
-    assert((Fixup & 3) == 0 && "Can't encode this offset!");
-    NumBits = 10;
-    break;
-  case ARMII::AddrModeT2_ldrex:
-    NumBits = 8;
-    Scale = 4;
-    break;
-  case ARMII::AddrModeT2_i12:
-  case ARMII::AddrMode_i12:
-    NumBits = 12;
-    break;
-  case ARMII::AddrModeT1_s: // SP-relative LD/ST
-    NumBits = 8;
-    Scale = 4;
-    break;
-  default:
-    llvm_unreachable("Unsupported addressing mode!");
-  }
-  // Make sure the offset is encodable for instructions that scale the
-  // immediate.
-  assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
-         "Can't encode this offset!");
-  OffVal += Fixup / Scale;
-
-  unsigned Mask = (1 << NumBits) - 1;
-
-  if (OffVal <= Mask) {
-    if (Updt)
-      MI->getOperand(ImmIdx).setImm(OffVal);
-    return true;
-  }
-
-  return false;
-
-}
-
+bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, 
+                                                 int64_t Fixup, 
+                                                 bool Updt) const { 
+  int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP); 
+  unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); 
+  if (SPIdx < 0) 
+    // No SP operand 
+    return true; 
+  else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2)) 
+    // If SP is not the base register we can't do much 
+    return false; 
+ 
+  // Stack might be involved but addressing mode doesn't handle any offset. 
+  // Rq: AddrModeT1_[1|2|4] don't operate on SP 
+  if (AddrMode == ARMII::AddrMode1        // Arithmetic instructions 
+      || AddrMode == ARMII::AddrMode4     // Load/Store Multiple 
+      || AddrMode == ARMII::AddrMode6     // Neon Load/Store Multiple 
+      || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register 
+      || AddrMode == ARMII::AddrModeT2_pc // PCrel access 
+      || AddrMode == ARMII::AddrMode2     // Used by PRE and POST indexed LD/ST 
+      || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE 
+      || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE 
+      || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR 
+      || AddrMode == ARMII::AddrModeNone) 
+    return false; 
+ 
+  unsigned NumOps = MI->getDesc().getNumOperands(); 
+  unsigned ImmIdx = NumOps - 3; 
+ 
+  const MachineOperand &Offset = MI->getOperand(ImmIdx); 
+  assert(Offset.isImm() && "Is not an immediate"); 
+  int64_t OffVal = Offset.getImm(); 
+ 
+  if (OffVal < 0) 
+    // Don't override data if the are below SP. 
+    return false; 
+ 
+  unsigned NumBits = 0; 
+  unsigned Scale = 1; 
+ 
+  switch (AddrMode) { 
+  case ARMII::AddrMode3: 
+    if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub) 
+      return false; 
+    OffVal = ARM_AM::getAM3Offset(OffVal); 
+    NumBits = 8; 
+    break; 
+  case ARMII::AddrMode5: 
+    if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub) 
+      return false; 
+    OffVal = ARM_AM::getAM5Offset(OffVal); 
+    NumBits = 8; 
+    Scale = 4; 
+    break; 
+  case ARMII::AddrMode5FP16: 
+    if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub) 
+      return false; 
+    OffVal = ARM_AM::getAM5FP16Offset(OffVal); 
+    NumBits = 8; 
+    Scale = 2; 
+    break; 
+  case ARMII::AddrModeT2_i8: 
+    NumBits = 8; 
+    break; 
+  case ARMII::AddrModeT2_i8s4: 
+    // FIXME: Values are already scaled in this addressing mode. 
+    assert((Fixup & 3) == 0 && "Can't encode this offset!"); 
+    NumBits = 10; 
+    break; 
+  case ARMII::AddrModeT2_ldrex: 
+    NumBits = 8; 
+    Scale = 4; 
+    break; 
+  case ARMII::AddrModeT2_i12: 
+  case ARMII::AddrMode_i12: 
+    NumBits = 12; 
+    break; 
+  case ARMII::AddrModeT1_s: // SP-relative LD/ST 
+    NumBits = 8; 
+    Scale = 4; 
+    break; 
+  default: 
+    llvm_unreachable("Unsupported addressing mode!"); 
+  } 
+  // Make sure the offset is encodable for instructions that scale the 
+  // immediate. 
+  assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && 
+         "Can't encode this offset!"); 
+  OffVal += Fixup / Scale; 
+ 
+  unsigned Mask = (1 << NumBits) - 1; 
+ 
+  if (OffVal <= Mask) { 
+    if (Updt) 
+      MI->getOperand(ImmIdx).setImm(OffVal); 
+    return true; 
+  } 
+ 
+  return false; 
+ 
+} 
+ 
 bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
   const Function &F = MF.getFunction();
@@ -6075,13 +6075,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
     Flags |= MachineOutlinerMBBFlags::HasCalls;
 
-  // LR liveness is overestimated in return blocks.
-
-  bool LRIsAvailable =
-      MBB.isReturnBlock() && !MBB.back().isCall()
-          ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
-          : LRU.available(ARM::LR);
-  if (!LRIsAvailable)
+  // LR liveness is overestimated in return blocks. 
+ 
+  bool LRIsAvailable = 
+      MBB.isReturnBlock() && !MBB.back().isCall() 
+          ? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend()) 
+          : LRU.available(ARM::LR); 
+  if (!LRIsAvailable) 
     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
 
   return true;
@@ -6119,9 +6119,9 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
 
   // Be conservative with ARMv8.1 MVE instructions.
   if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
-      Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
-      Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||
-      Opc == ARM::t2LoopEndDec)
+      Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || 
+      Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd || 
+      Opc == ARM::t2LoopEndDec) 
     return outliner::InstrType::Illegal;
 
   const MCInstrDesc &MCID = MI.getDesc();
@@ -6155,56 +6155,56 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
     return outliner::InstrType::Illegal;
 
   if (MI.isCall()) {
-    // Get the function associated with the call.  Look at each operand and find
-    // the one that represents the calle and get its name.
-    const Function *Callee = nullptr;
-    for (const MachineOperand &MOP : MI.operands()) {
-      if (MOP.isGlobal()) {
-        Callee = dyn_cast<Function>(MOP.getGlobal());
-        break;
-      }
-    }
-
-    // Dont't outline calls to "mcount" like functions, in particular Linux
-    // kernel function tracing relies on it.
-    if (Callee &&
-        (Callee->getName() == "\01__gnu_mcount_nc" ||
-         Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))
-      return outliner::InstrType::Illegal;
-
+    // Get the function associated with the call.  Look at each operand and find 
+    // the one that represents the calle and get its name. 
+    const Function *Callee = nullptr; 
+    for (const MachineOperand &MOP : MI.operands()) { 
+      if (MOP.isGlobal()) { 
+        Callee = dyn_cast<Function>(MOP.getGlobal()); 
+        break; 
+      } 
+    } 
+ 
+    // Dont't outline calls to "mcount" like functions, in particular Linux 
+    // kernel function tracing relies on it. 
+    if (Callee && 
+        (Callee->getName() == "\01__gnu_mcount_nc" || 
+         Callee->getName() == "\01mcount" || Callee->getName() == "__mcount")) 
+      return outliner::InstrType::Illegal; 
+ 
     // If we don't know anything about the callee, assume it depends on the
     // stack layout of the caller. In that case, it's only legal to outline
     // as a tail-call. Explicitly list the call instructions we know about so
     // we don't get unexpected results with call pseudo-instructions.
     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
     if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
-        Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||
-        Opc == ARM::tBLXi)
+        Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || 
+        Opc == ARM::tBLXi) 
       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
 
-    if (!Callee)
-      return UnknownCallOutlineType;
-
-    // We have a function we have information about.  Check if it's something we
-    // can safely outline.
-    MachineFunction *MF = MI.getParent()->getParent();
-    MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
-
-    // We don't know what's going on with the callee at all.  Don't touch it.
-    if (!CalleeMF)
-      return UnknownCallOutlineType;
-
-    // Check if we know anything about the callee saves on the function. If we
-    // don't, then don't touch it, since that implies that we haven't computed
-    // anything about its stack frame yet.
-    MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
-    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
-        MFI.getNumObjects() > 0)
-      return UnknownCallOutlineType;
-
-    // At this point, we can say that CalleeMF ought to not pass anything on the
-    // stack. Therefore, we can outline it.
-    return outliner::InstrType::Legal;
+    if (!Callee) 
+      return UnknownCallOutlineType; 
+ 
+    // We have a function we have information about.  Check if it's something we 
+    // can safely outline. 
+    MachineFunction *MF = MI.getParent()->getParent(); 
+    MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); 
+ 
+    // We don't know what's going on with the callee at all.  Don't touch it. 
+    if (!CalleeMF) 
+      return UnknownCallOutlineType; 
+ 
+    // Check if we know anything about the callee saves on the function. If we 
+    // don't, then don't touch it, since that implies that we haven't computed 
+    // anything about its stack frame yet. 
+    MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); 
+    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || 
+        MFI.getNumObjects() > 0) 
+      return UnknownCallOutlineType; 
+ 
+    // At this point, we can say that CalleeMF ought to not pass anything on the 
+    // stack. Therefore, we can outline it. 
+    return outliner::InstrType::Legal; 
   }
 
   // Since calls are handled, don't touch LR or PC
@@ -6227,19 +6227,19 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
     if (!MightNeedStackFixUp)
       return outliner::InstrType::Legal;
 
-    // Any modification of SP will break our code to save/restore LR.
-    // FIXME: We could handle some instructions which add a constant offset to
-    // SP, with a bit more work.
-    if (MI.modifiesRegister(ARM::SP, TRI))
-      return outliner::InstrType::Illegal;
-
-    // At this point, we have a stack instruction that we might need to fix up.
-    // up. We'll handle it if it's a load or store.
-    if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),
-                                  false))
-      return outliner::InstrType::Legal;
-
-    // We can't fix it up, so don't outline it.
+    // Any modification of SP will break our code to save/restore LR. 
+    // FIXME: We could handle some instructions which add a constant offset to 
+    // SP, with a bit more work. 
+    if (MI.modifiesRegister(ARM::SP, TRI)) 
+      return outliner::InstrType::Illegal; 
+ 
+    // At this point, we have a stack instruction that we might need to fix up. 
+    // up. We'll handle it if it's a load or store. 
+    if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), 
+                                  false)) 
+      return outliner::InstrType::Legal; 
+ 
+    // We can't fix it up, so don't outline it. 
     return outliner::InstrType::Illegal;
   }
 
@@ -6255,104 +6255,104 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
   return outliner::InstrType::Legal;
 }
 
-void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
-  for (MachineInstr &MI : MBB) {
-    checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);
-  }
-}
-
-void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator It) const {
-  unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;
-  int Align = -Subtarget.getStackAlignment().value();
-  BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)
-    .addReg(ARM::LR, RegState::Kill)
-    .addReg(ARM::SP)
-    .addImm(Align)
-    .add(predOps(ARMCC::AL));
-}
-
-void ARMBaseInstrInfo::emitCFIForLRSaveOnStack(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
-  MachineFunction &MF = *MBB.getParent();
-  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
-  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
-  int Align = Subtarget.getStackAlignment().value();
-  // Add a CFI saying the stack was moved down.
-  int64_t StackPosEntry =
-      MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(StackPosEntry)
-      .setMIFlags(MachineInstr::FrameSetup);
-
-  // Add a CFI saying that the LR that we want to find is now higher than
-  // before.
-  int64_t LRPosEntry =
-      MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(LRPosEntry)
-      .setMIFlags(MachineInstr::FrameSetup);
-}
-
-void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator It,
-                                             Register Reg) const {
-  MachineFunction &MF = *MBB.getParent();
-  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
-  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
-  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-
-  int64_t LRPosEntry = MF.addFrameInst(
-      MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(LRPosEntry)
-      .setMIFlags(MachineInstr::FrameSetup);
-}
-
-void ARMBaseInstrInfo::restoreLRFromStack(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
-  unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
-  MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)
-    .addReg(ARM::SP, RegState::Define)
-    .addReg(ARM::SP);
-  if (!Subtarget.isThumb())
-    MIB.addReg(0);
-  MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL));
-}
-
-void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
-  // Now stack has moved back up...
-  MachineFunction &MF = *MBB.getParent();
-  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
-  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
-  int64_t StackPosEntry =
-      MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(StackPosEntry)
-      .setMIFlags(MachineInstr::FrameDestroy);
-
-  // ... and we have restored LR.
-  int64_t LRPosEntry =
-      MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(LRPosEntry)
-      .setMIFlags(MachineInstr::FrameDestroy);
-}
-
-void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {
-  MachineFunction &MF = *MBB.getParent();
-  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();
-  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);
-
-  int64_t LRPosEntry =
-      MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));
-  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))
-      .addCFIIndex(LRPosEntry)
-      .setMIFlags(MachineInstr::FrameDestroy);
-}
-
+void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { 
+  for (MachineInstr &MI : MBB) { 
+    checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true); 
+  } 
+} 
+ 
+void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, 
+                                     MachineBasicBlock::iterator It) const { 
+  unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; 
+  int Align = -Subtarget.getStackAlignment().value(); 
+  BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) 
+    .addReg(ARM::LR, RegState::Kill) 
+    .addReg(ARM::SP) 
+    .addImm(Align) 
+    .add(predOps(ARMCC::AL)); 
+} 
+ 
+void ARMBaseInstrInfo::emitCFIForLRSaveOnStack( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { 
+  MachineFunction &MF = *MBB.getParent(); 
+  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); 
+  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); 
+  int Align = Subtarget.getStackAlignment().value(); 
+  // Add a CFI saying the stack was moved down. 
+  int64_t StackPosEntry = 
+      MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(StackPosEntry) 
+      .setMIFlags(MachineInstr::FrameSetup); 
+ 
+  // Add a CFI saying that the LR that we want to find is now higher than 
+  // before. 
+  int64_t LRPosEntry = 
+      MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(LRPosEntry) 
+      .setMIFlags(MachineInstr::FrameSetup); 
+} 
+ 
+void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, 
+                                             MachineBasicBlock::iterator It, 
+                                             Register Reg) const { 
+  MachineFunction &MF = *MBB.getParent(); 
+  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); 
+  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); 
+  unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 
+ 
+  int64_t LRPosEntry = MF.addFrameInst( 
+      MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(LRPosEntry) 
+      .setMIFlags(MachineInstr::FrameSetup); 
+} 
+ 
+void ARMBaseInstrInfo::restoreLRFromStack( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { 
+  unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; 
+  MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) 
+    .addReg(ARM::SP, RegState::Define) 
+    .addReg(ARM::SP); 
+  if (!Subtarget.isThumb()) 
+    MIB.addReg(0); 
+  MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); 
+} 
+ 
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { 
+  // Now stack has moved back up... 
+  MachineFunction &MF = *MBB.getParent(); 
+  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); 
+  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); 
+  int64_t StackPosEntry = 
+      MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(StackPosEntry) 
+      .setMIFlags(MachineInstr::FrameDestroy); 
+ 
+  // ... and we have restored LR. 
+  int64_t LRPosEntry = 
+      MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(LRPosEntry) 
+      .setMIFlags(MachineInstr::FrameDestroy); 
+} 
+ 
+void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { 
+  MachineFunction &MF = *MBB.getParent(); 
+  const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); 
+  unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); 
+ 
+  int64_t LRPosEntry = 
+      MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); 
+  BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) 
+      .addCFIIndex(LRPosEntry) 
+      .setMIFlags(MachineInstr::FrameDestroy); 
+} 
+ 
 void ARMBaseInstrInfo::buildOutlinedFrame(
     MachineBasicBlock &MBB, MachineFunction &MF,
     const outliner::OutlinedFunction &OF) const {
@@ -6374,57 +6374,57 @@ void ARMBaseInstrInfo::buildOutlinedFrame(
     Call->eraseFromParent();
   }
 
-  // Is there a call in the outlined range?
-  auto IsNonTailCall = [](MachineInstr &MI) {
-    return MI.isCall() && !MI.isReturn();
-  };
-  if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
-    MachineBasicBlock::iterator It = MBB.begin();
-    MachineBasicBlock::iterator Et = MBB.end();
-
-    if (OF.FrameConstructionID == MachineOutlinerTailCall ||
-        OF.FrameConstructionID == MachineOutlinerThunk)
-      Et = std::prev(MBB.end());
-
-    // We have to save and restore LR, we need to add it to the liveins if it
-    // is not already part of the set.  This is suffient since outlined
-    // functions only have one block.
-    if (!MBB.isLiveIn(ARM::LR))
-      MBB.addLiveIn(ARM::LR);
-
-    // Insert a save before the outlined region
-    saveLROnStack(MBB, It);
-    emitCFIForLRSaveOnStack(MBB, It);
-
-    // Fix up the instructions in the range, since we're going to modify the
-    // stack.
-    assert(OF.FrameConstructionID != MachineOutlinerDefault &&
-           "Can only fix up stack references once");
-    fixupPostOutline(MBB);
-
-    // Insert a restore before the terminator for the function.  Restore LR.
-    restoreLRFromStack(MBB, Et);
-    emitCFIForLRRestoreFromStack(MBB, Et);
-  }
-
-  // If this is a tail call outlined function, then there's already a return.
-  if (OF.FrameConstructionID == MachineOutlinerTailCall ||
-      OF.FrameConstructionID == MachineOutlinerThunk)
-    return;
-
+  // Is there a call in the outlined range? 
+  auto IsNonTailCall = [](MachineInstr &MI) { 
+    return MI.isCall() && !MI.isReturn(); 
+  }; 
+  if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { 
+    MachineBasicBlock::iterator It = MBB.begin(); 
+    MachineBasicBlock::iterator Et = MBB.end(); 
+ 
+    if (OF.FrameConstructionID == MachineOutlinerTailCall || 
+        OF.FrameConstructionID == MachineOutlinerThunk) 
+      Et = std::prev(MBB.end()); 
+ 
+    // We have to save and restore LR, we need to add it to the liveins if it 
+    // is not already part of the set.  This is suffient since outlined 
+    // functions only have one block. 
+    if (!MBB.isLiveIn(ARM::LR)) 
+      MBB.addLiveIn(ARM::LR); 
+ 
+    // Insert a save before the outlined region 
+    saveLROnStack(MBB, It); 
+    emitCFIForLRSaveOnStack(MBB, It); 
+ 
+    // Fix up the instructions in the range, since we're going to modify the 
+    // stack. 
+    assert(OF.FrameConstructionID != MachineOutlinerDefault && 
+           "Can only fix up stack references once"); 
+    fixupPostOutline(MBB); 
+ 
+    // Insert a restore before the terminator for the function.  Restore LR. 
+    restoreLRFromStack(MBB, Et); 
+    emitCFIForLRRestoreFromStack(MBB, Et); 
+  } 
+ 
+  // If this is a tail call outlined function, then there's already a return. 
+  if (OF.FrameConstructionID == MachineOutlinerTailCall || 
+      OF.FrameConstructionID == MachineOutlinerThunk) 
+    return; 
+ 
   // Here we have to insert the return ourselves.  Get the correct opcode from
   // current feature set.
   BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))
       .add(predOps(ARMCC::AL));
-
-  // Did we have to modify the stack by saving the link register?
-  if (OF.FrameConstructionID != MachineOutlinerDefault &&
-      OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)
-    return;
-
-  // We modified the stack.
-  // Walk over the basic block and fix up all the stack accesses.
-  fixupPostOutline(MBB);
+ 
+  // Did we have to modify the stack by saving the link register? 
+  if (OF.FrameConstructionID != MachineOutlinerDefault && 
+      OF.Candidates[0].CallConstructionID != MachineOutlinerDefault) 
+    return; 
+ 
+  // We modified the stack. 
+  // Walk over the basic block and fix up all the stack accesses. 
+  fixupPostOutline(MBB); 
 }
 
 MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
@@ -6456,14 +6456,14 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
     CallMIB.add(predOps(ARMCC::AL));
   CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
 
-  if (C.CallConstructionID == MachineOutlinerNoLRSave ||
-      C.CallConstructionID == MachineOutlinerThunk) {
-    // No, so just insert the call.
-    It = MBB.insert(It, CallMIB);
-    return It;
-  }
-
-  const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+  if (C.CallConstructionID == MachineOutlinerNoLRSave || 
+      C.CallConstructionID == MachineOutlinerThunk) { 
+    // No, so just insert the call. 
+    It = MBB.insert(It, CallMIB); 
+    return It; 
+  } 
+ 
+  const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); 
   // Can we save to a register?
   if (C.CallConstructionID == MachineOutlinerRegSave) {
     unsigned Reg = findRegisterToSaveLRTo(C);
@@ -6471,55 +6471,55 @@ MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
 
     // Save and restore LR from that register.
     copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);
-    if (!AFI.isLRSpilled())
-      emitCFIForLRSaveToReg(MBB, It, Reg);
+    if (!AFI.isLRSpilled()) 
+      emitCFIForLRSaveToReg(MBB, It, Reg); 
     CallPt = MBB.insert(It, CallMIB);
     copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);
-    if (!AFI.isLRSpilled())
-      emitCFIForLRRestoreFromReg(MBB, It);
+    if (!AFI.isLRSpilled()) 
+      emitCFIForLRRestoreFromReg(MBB, It); 
     It--;
     return CallPt;
   }
-  // We have the default case. Save and restore from SP.
-  if (!MBB.isLiveIn(ARM::LR))
-    MBB.addLiveIn(ARM::LR);
-  saveLROnStack(MBB, It);
-  if (!AFI.isLRSpilled())
-    emitCFIForLRSaveOnStack(MBB, It);
-  CallPt = MBB.insert(It, CallMIB);
-  restoreLRFromStack(MBB, It);
-  if (!AFI.isLRSpilled())
-    emitCFIForLRRestoreFromStack(MBB, It);
-  It--;
-  return CallPt;
-}
-
-bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
-    MachineFunction &MF) const {
-  return Subtarget.isMClass() && MF.getFunction().hasMinSize();
-}
-
-bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
-                                                         AAResults *AA) const {
-  // Try hard to rematerialize any VCTPs because if we spill P0, it will block
-  // the tail predication conversion. This means that the element count
-  // register has to be live for longer, but that has to be better than
-  // spill/restore and VPT predication.
-  return isVCTP(&MI) && !isPredicated(MI);
-}
-
-unsigned llvm::getBLXOpcode(const MachineFunction &MF) {
-  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip
-                                                          : ARM::BLX;
-}
-
-unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {
-  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip
-                                                          : ARM::tBLXr;
-}
-
-unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
-  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
-                                                          : ARM::BLX_pred;
-}
-
+  // We have the default case. Save and restore from SP. 
+  if (!MBB.isLiveIn(ARM::LR)) 
+    MBB.addLiveIn(ARM::LR); 
+  saveLROnStack(MBB, It); 
+  if (!AFI.isLRSpilled()) 
+    emitCFIForLRSaveOnStack(MBB, It); 
+  CallPt = MBB.insert(It, CallMIB); 
+  restoreLRFromStack(MBB, It); 
+  if (!AFI.isLRSpilled()) 
+    emitCFIForLRRestoreFromStack(MBB, It); 
+  It--; 
+  return CallPt; 
+}
+ 
+bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( 
+    MachineFunction &MF) const { 
+  return Subtarget.isMClass() && MF.getFunction().hasMinSize(); 
+} 
+ 
+bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, 
+                                                         AAResults *AA) const { 
+  // Try hard to rematerialize any VCTPs because if we spill P0, it will block 
+  // the tail predication conversion. This means that the element count 
+  // register has to be live for longer, but that has to be better than 
+  // spill/restore and VPT predication. 
+  return isVCTP(&MI) && !isPredicated(MI); 
+} 
+ 
+unsigned llvm::getBLXOpcode(const MachineFunction &MF) { 
+  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip 
+                                                          : ARM::BLX; 
+} 
+ 
+unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { 
+  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip 
+                                                          : ARM::tBLXr; 
+} 
+ 
+unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { 
+  return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip 
+                                                          : ARM::BLX_pred; 
+} 
+ 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
index 1b843c4281..e61d557c1d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -132,10 +132,10 @@ public:
                                const ScheduleDAG *DAG) const override;
 
   ScheduleHazardRecognizer *
-  CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
-                                 const ScheduleDAGMI *DAG) const override;
-
-  ScheduleHazardRecognizer *
+  CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 
+                                 const ScheduleDAGMI *DAG) const override; 
+ 
+  ScheduleHazardRecognizer * 
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                      const ScheduleDAG *DAG) const override;
 
@@ -175,8 +175,8 @@ public:
   bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
                          ArrayRef<MachineOperand> Pred2) const override;
 
-  bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred,
-                         bool SkipDead) const override;
+  bool ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred, 
+                         bool SkipDead) const override; 
 
   bool isPredicable(const MachineInstr &MI) const override;
 
@@ -361,60 +361,60 @@ public:
                      MachineBasicBlock::iterator &It, MachineFunction &MF,
                      const outliner::Candidate &C) const override;
 
-  /// Enable outlining by default at -Oz.
-  bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
-
-  bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override {
-    return MI->getOpcode() == ARM::t2LoopEndDec ||
-           MI->getOpcode() == ARM::t2DoLoopStartTP;
-  }
-
+  /// Enable outlining by default at -Oz. 
+  bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; 
+ 
+  bool isUnspillableTerminatorImpl(const MachineInstr *MI) const override { 
+    return MI->getOpcode() == ARM::t2LoopEndDec || 
+           MI->getOpcode() == ARM::t2DoLoopStartTP; 
+  } 
+ 
 private:
   /// Returns an unused general-purpose register which can be used for
   /// constructing an outlined call if one exists. Returns 0 otherwise.
   unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
 
-  // Adds an instruction which saves the link register on top of the stack into
-  /// the MachineBasicBlock \p MBB at position \p It.
-  void saveLROnStack(MachineBasicBlock &MBB,
-                     MachineBasicBlock::iterator It) const;
-
-  /// Adds an instruction which restores the link register from the top the
-  /// stack into the MachineBasicBlock \p MBB at position \p It.
-  void restoreLRFromStack(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator It) const;
-
-  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
-  /// for the case when the LR is saved on the stack.
-  void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator It) const;
-
-  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
-  /// for the case when the LR is saved in the register \p Reg.
-  void emitCFIForLRSaveToReg(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator It,
-                             Register Reg) const;
-
-  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
-  /// after the LR is was restored from the stack.
-  void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator It) const;
-
-  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It,
-  /// after the LR is was restored from a register.
-  void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator It) const;
-  /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
-  /// so that they will be valid post-outlining.
-  ///
-  /// \param MBB A \p MachineBasicBlock in an outlined function.
-  void fixupPostOutline(MachineBasicBlock &MBB) const;
-
-  /// Returns true if the machine instruction offset can handle the stack fixup
-  /// and updates it if requested.
-  bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup,
-                                 bool Updt) const;
-
+  // Adds an instruction which saves the link register on top of the stack into 
+  /// the MachineBasicBlock \p MBB at position \p It. 
+  void saveLROnStack(MachineBasicBlock &MBB, 
+                     MachineBasicBlock::iterator It) const; 
+ 
+  /// Adds an instruction which restores the link register from the top the 
+  /// stack into the MachineBasicBlock \p MBB at position \p It. 
+  void restoreLRFromStack(MachineBasicBlock &MBB, 
+                          MachineBasicBlock::iterator It) const; 
+ 
+  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, 
+  /// for the case when the LR is saved on the stack. 
+  void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB, 
+                               MachineBasicBlock::iterator It) const; 
+ 
+  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, 
+  /// for the case when the LR is saved in the register \p Reg. 
+  void emitCFIForLRSaveToReg(MachineBasicBlock &MBB, 
+                             MachineBasicBlock::iterator It, 
+                             Register Reg) const; 
+ 
+  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, 
+  /// after the LR is was restored from the stack. 
+  void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB, 
+                                    MachineBasicBlock::iterator It) const; 
+ 
+  /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, 
+  /// after the LR is was restored from a register. 
+  void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB, 
+                                  MachineBasicBlock::iterator It) const; 
+  /// \brief Sets the offsets on outlined instructions in \p MBB which use SP 
+  /// so that they will be valid post-outlining. 
+  /// 
+  /// \param MBB A \p MachineBasicBlock in an outlined function. 
+  void fixupPostOutline(MachineBasicBlock &MBB) const; 
+ 
+  /// Returns true if the machine instruction offset can handle the stack fixup 
+  /// and updates it if requested. 
+  bool checkAndUpdateStackOffset(MachineInstr *MI, int64_t Fixup, 
+                                 bool Updt) const; 
+ 
   unsigned getInstBundleLength(const MachineInstr &MI) const;
 
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
@@ -477,9 +477,9 @@ private:
   MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
                                  const TargetInstrInfo *TII) const;
 
-  bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
-                                         AAResults *AA) const override;
-
+  bool isReallyTriviallyReMaterializable(const MachineInstr &MI, 
+                                         AAResults *AA) const override; 
+ 
 private:
   /// Modeling special VFP / NEON fp MLA / MLS hazards.
 
@@ -644,77 +644,77 @@ static inline bool isJumpTableBranchOpcode(int Opc) {
          Opc == ARM::t2BR_JT;
 }
 
-static inline bool isLowOverheadTerminatorOpcode(int Opc) {
-  return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||
-         Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec;
-}
-
+static inline bool isLowOverheadTerminatorOpcode(int Opc) { 
+  return Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || 
+         Opc == ARM::t2LoopEnd || Opc == ARM::t2LoopEndDec; 
+} 
+ 
 static inline
 bool isIndirectBranchOpcode(int Opc) {
   return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
 }
 
-static inline bool isIndirectCall(const MachineInstr &MI) {
-  int Opc = MI.getOpcode();
-  switch (Opc) {
-    // indirect calls:
-  case ARM::BLX:
-  case ARM::BLX_noip:
-  case ARM::BLX_pred:
-  case ARM::BLX_pred_noip:
-  case ARM::BX_CALL:
-  case ARM::BMOVPCRX_CALL:
-  case ARM::TCRETURNri:
-  case ARM::TAILJMPr:
-  case ARM::TAILJMPr4:
-  case ARM::tBLXr:
-  case ARM::tBLXr_noip:
-  case ARM::tBLXNSr:
-  case ARM::tBLXNS_CALL:
-  case ARM::tBX_CALL:
-  case ARM::tTAILJMPr:
-    assert(MI.isCall(MachineInstr::IgnoreBundle));
-    return true;
-    // direct calls:
-  case ARM::BL:
-  case ARM::BL_pred:
-  case ARM::BMOVPCB_CALL:
-  case ARM::BL_PUSHLR:
-  case ARM::BLXi:
-  case ARM::TCRETURNdi:
-  case ARM::TAILJMPd:
-  case ARM::SVC:
-  case ARM::HVC:
-  case ARM::TPsoft:
-  case ARM::tTAILJMPd:
-  case ARM::t2SMC:
-  case ARM::t2HVC:
-  case ARM::tBL:
-  case ARM::tBLXi:
-  case ARM::tBL_PUSHLR:
-  case ARM::tTAILJMPdND:
-  case ARM::tSVC:
-  case ARM::tTPsoft:
-    assert(MI.isCall(MachineInstr::IgnoreBundle));
-    return false;
-  }
-  assert(!MI.isCall(MachineInstr::IgnoreBundle));
-  return false;
-}
-
-static inline bool isIndirectControlFlowNotComingBack(const MachineInstr &MI) {
-  int opc = MI.getOpcode();
-  return MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode()) ||
-         isJumpTableBranchOpcode(opc);
-}
-
-static inline bool isSpeculationBarrierEndBBOpcode(int Opc) {
-  return Opc == ARM::SpeculationBarrierISBDSBEndBB ||
-         Opc == ARM::SpeculationBarrierSBEndBB ||
-         Opc == ARM::t2SpeculationBarrierISBDSBEndBB ||
-         Opc == ARM::t2SpeculationBarrierSBEndBB;
-}
-
+static inline bool isIndirectCall(const MachineInstr &MI) { 
+  int Opc = MI.getOpcode(); 
+  switch (Opc) { 
+    // indirect calls: 
+  case ARM::BLX: 
+  case ARM::BLX_noip: 
+  case ARM::BLX_pred: 
+  case ARM::BLX_pred_noip: 
+  case ARM::BX_CALL: 
+  case ARM::BMOVPCRX_CALL: 
+  case ARM::TCRETURNri: 
+  case ARM::TAILJMPr: 
+  case ARM::TAILJMPr4: 
+  case ARM::tBLXr: 
+  case ARM::tBLXr_noip: 
+  case ARM::tBLXNSr: 
+  case ARM::tBLXNS_CALL: 
+  case ARM::tBX_CALL: 
+  case ARM::tTAILJMPr: 
+    assert(MI.isCall(MachineInstr::IgnoreBundle)); 
+    return true; 
+    // direct calls: 
+  case ARM::BL: 
+  case ARM::BL_pred: 
+  case ARM::BMOVPCB_CALL: 
+  case ARM::BL_PUSHLR: 
+  case ARM::BLXi: 
+  case ARM::TCRETURNdi: 
+  case ARM::TAILJMPd: 
+  case ARM::SVC: 
+  case ARM::HVC: 
+  case ARM::TPsoft: 
+  case ARM::tTAILJMPd: 
+  case ARM::t2SMC: 
+  case ARM::t2HVC: 
+  case ARM::tBL: 
+  case ARM::tBLXi: 
+  case ARM::tBL_PUSHLR: 
+  case ARM::tTAILJMPdND: 
+  case ARM::tSVC: 
+  case ARM::tTPsoft: 
+    assert(MI.isCall(MachineInstr::IgnoreBundle)); 
+    return false; 
+  } 
+  assert(!MI.isCall(MachineInstr::IgnoreBundle)); 
+  return false; 
+} 
+ 
+static inline bool isIndirectControlFlowNotComingBack(const MachineInstr &MI) { 
+  int opc = MI.getOpcode(); 
+  return MI.isReturn() || isIndirectBranchOpcode(MI.getOpcode()) || 
+         isJumpTableBranchOpcode(opc); 
+} 
+ 
+static inline bool isSpeculationBarrierEndBBOpcode(int Opc) { 
+  return Opc == ARM::SpeculationBarrierISBDSBEndBB || 
+         Opc == ARM::SpeculationBarrierSBEndBB || 
+         Opc == ARM::t2SpeculationBarrierISBDSBEndBB || 
+         Opc == ARM::t2SpeculationBarrierSBEndBB; 
+} 
+ 
 static inline bool isPopOpcode(int Opc) {
   return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
          Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
@@ -886,17 +886,17 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
     return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
   case ARMII::AddrModeT2_i7s4:
     return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
-  case ARMII::AddrModeT2_i8:
-    return std::abs(Imm) < (((1 << 8) * 1) - 1);
-  case ARMII::AddrModeT2_i12:
-    return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
+  case ARMII::AddrModeT2_i8: 
+    return std::abs(Imm) < (((1 << 8) * 1) - 1); 
+  case ARMII::AddrModeT2_i12: 
+    return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); 
   default:
     llvm_unreachable("Unhandled Addressing mode");
   }
 }
 
-// Return true if the given intrinsic is a gather
-inline bool isGather(IntrinsicInst *IntInst) {
+// Return true if the given intrinsic is a gather 
+inline bool isGather(IntrinsicInst *IntInst) { 
   if (IntInst == nullptr)
     return false;
   unsigned IntrinsicID = IntInst->getIntrinsicID();
@@ -906,15 +906,15 @@ inline bool isGather(IntrinsicInst *IntInst) {
           IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb ||
           IntrinsicID == Intrinsic::arm_mve_vldr_gather_base_wb_predicated ||
           IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset ||
-          IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated);
-}
-
-// Return true if the given intrinsic is a scatter
-inline bool isScatter(IntrinsicInst *IntInst) {
-  if (IntInst == nullptr)
-    return false;
-  unsigned IntrinsicID = IntInst->getIntrinsicID();
-  return (IntrinsicID == Intrinsic::masked_scatter ||
+          IntrinsicID == Intrinsic::arm_mve_vldr_gather_offset_predicated); 
+} 
+ 
+// Return true if the given intrinsic is a scatter 
+inline bool isScatter(IntrinsicInst *IntInst) { 
+  if (IntInst == nullptr) 
+    return false; 
+  unsigned IntrinsicID = IntInst->getIntrinsicID(); 
+  return (IntrinsicID == Intrinsic::masked_scatter || 
           IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base ||
           IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_predicated ||
           IntrinsicID == Intrinsic::arm_mve_vstr_scatter_base_wb ||
@@ -923,17 +923,17 @@ inline bool isScatter(IntrinsicInst *IntInst) {
           IntrinsicID == Intrinsic::arm_mve_vstr_scatter_offset_predicated);
 }
 
-// Return true if the given intrinsic is a gather or scatter
-inline bool isGatherScatter(IntrinsicInst *IntInst) {
-  if (IntInst == nullptr)
-    return false;
-  return isGather(IntInst) || isScatter(IntInst);
-}
-
-unsigned getBLXOpcode(const MachineFunction &MF);
-unsigned gettBLXrOpcode(const MachineFunction &MF);
-unsigned getBLXpredOpcode(const MachineFunction &MF);
-
+// Return true if the given intrinsic is a gather or scatter 
+inline bool isGatherScatter(IntrinsicInst *IntInst) { 
+  if (IntInst == nullptr) 
+    return false; 
+  return isGather(IntInst) || isScatter(IntInst); 
+} 
+ 
+unsigned getBLXOpcode(const MachineFunction &MF); 
+unsigned gettBLXrOpcode(const MachineFunction &MF); 
+unsigned getBLXpredOpcode(const MachineFunction &MF); 
+ 
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1a264dabee..138431e36d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -55,9 +55,9 @@
 using namespace llvm;
 
 ARMBaseRegisterInfo::ARMBaseRegisterInfo()
-    : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {
-  ARM_MC::initLLVMToCVRegMapping(this);
-}
+    : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) { 
+  ARM_MC::initLLVMToCVRegMapping(this); 
+} 
 
 static unsigned getFramePointerReg(const ARMSubtarget &STI) {
   return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -330,13 +330,13 @@ bool ARMBaseRegisterInfo::getRegAllocationHints(
   case ARMRI::RegPairOdd:
     Odd = 1;
     break;
-  case ARMRI::RegLR:
+  case ARMRI::RegLR: 
     TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
-    if (MRI.getRegClass(VirtReg)->contains(ARM::LR))
-      Hints.push_back(ARM::LR);
+    if (MRI.getRegClass(VirtReg)->contains(ARM::LR)) 
+      Hints.push_back(ARM::LR); 
     return false;
-  default:
-    return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+  default: 
+    return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); 
   }
 
   // This register should preferably be even (Odd == 0) or odd (Odd == 1).
@@ -640,10 +640,10 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
 
 /// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
 /// be a pointer to FrameIdx at the beginning of the basic block.
-Register
-ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
-                                                  int FrameIdx,
-                                                  int64_t Offset) const {
+Register 
+ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 
+                                                  int FrameIdx, 
+                                                  int64_t Offset) const { 
   ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
   unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
     (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri);
@@ -657,7 +657,7 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   const MCInstrDesc &MCID = TII.get(ADDriOpc);
-  Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+  Register BaseReg = MRI.createVirtualRegister(&ARM::GPRRegClass); 
   MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
 
   MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -665,8 +665,8 @@ ARMBaseRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
 
   if (!AFI->isThumb1OnlyFunction())
     MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
-
-  return BaseReg;
+ 
+  return BaseReg; 
 }
 
 void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
index 5afb6c6aa0..53e8aa657c 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -32,11 +32,11 @@ class LiveIntervals;
 namespace ARMRI {
 
   enum {
-    // Used for LDRD register pairs
+    // Used for LDRD register pairs 
     RegPairOdd  = 1,
-    RegPairEven = 2,
-    // Used to hint for lr in t2DoLoopStart
-    RegLR = 3
+    RegPairEven = 2, 
+    // Used to hint for lr in t2DoLoopStart 
+    RegLR = 3 
   };
 
 } // end namespace ARMRI
@@ -168,8 +168,8 @@ public:
   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
                                    int Idx) const override;
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
-  Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
-                                        int64_t Offset) const override;
+  Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, 
+                                        int64_t Offset) const override; 
   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                          int64_t Offset) const override;
   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
index 9ba16003a9..2cc6a5b4c1 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -1,228 +1,228 @@
-//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass re-arranges machine basic blocks to suit target requirements.
-// Currently it only moves blocks to fix backwards WLS branches.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBasicBlockInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-block-placement"
-#define DEBUG_PREFIX "ARM Block Placement: "
-
-namespace llvm {
-class ARMBlockPlacement : public MachineFunctionPass {
-private:
-  const ARMBaseInstrInfo *TII;
-  std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
-  MachineLoopInfo *MLI = nullptr;
-
-public:
-  static char ID;
-  ARMBlockPlacement() : MachineFunctionPass(ID) {}
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
-  bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<MachineLoopInfo>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-};
-
-} // namespace llvm
-
-FunctionPass *llvm::createARMBlockPlacementPass() {
-  return new ARMBlockPlacement();
-}
-
-char ARMBlockPlacement::ID = 0;
-
-INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
-                false)
-
-bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()))
-      return false;
-  const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
-  if (!ST.hasLOB())
-    return false;
-  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n");
-  MLI = &getAnalysis<MachineLoopInfo>();
-  TII = static_cast<const ARMBaseInstrInfo *>(ST.getInstrInfo());
-  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
-  MF.RenumberBlocks();
-  BBUtils->computeAllBlockSizes();
-  BBUtils->adjustBBOffsetsAfter(&MF.front());
-  bool Changed = false;
-
-  // Find loops with a backwards branching WLS.
-  // This requires looping over the loops in the function, checking each
-  // preheader for a WLS and if its target is before the preheader. If moving
-  // the target block wouldn't produce another backwards WLS or a new forwards
-  // LE branch then move the target block after the preheader.
-  for (auto *ML : *MLI) {
-    MachineBasicBlock *Preheader = ML->getLoopPredecessor();
-    if (!Preheader)
-      continue;
-
-    for (auto &Terminator : Preheader->terminators()) {
-      if (Terminator.getOpcode() != ARM::t2WhileLoopStart)
-        continue;
-      MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB();
-      // We don't want to move the function's entry block.
-      if (!LoopExit->getPrevNode())
-        continue;
-      if (blockIsBefore(Preheader, LoopExit))
-        continue;
-      LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
-                        << Preheader->getFullName() << " to "
-                        << LoopExit->getFullName() << "\n");
-
-      // Make sure that moving the target block doesn't cause any of its WLSs
-      // that were previously not backwards to become backwards
-      bool CanMove = true;
-      for (auto &LoopExitTerminator : LoopExit->terminators()) {
-        if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart)
-          continue;
-        // An example loop structure where the LoopExit can't be moved, since
-        // bb1's WLS will become backwards once it's moved after bb3 bb1: -
-        // LoopExit
-        //      WLS bb2  - LoopExit2
-        // bb2:
-        //      ...
-        // bb3:          - Preheader
-        //      WLS bb1
-        // bb4:          - Header
-        MachineBasicBlock *LoopExit2 =
-            LoopExitTerminator.getOperand(1).getMBB();
-        // If the WLS from LoopExit to LoopExit2 is already backwards then
-        // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
-        // after the Preheader then moving will keep it as a forward branch, so
-        // it can be moved. If LoopExit2 is between the Preheader and LoopExit
-        // then moving LoopExit will make it a backwards branch, so it can't be
-        // moved since we'd fix one and introduce one backwards branch.
-        // TODO: Analyse the blocks to make a decision if it would be worth
-        // moving LoopExit even if LoopExit2 is between the Preheader and
-        // LoopExit.
-        if (!blockIsBefore(LoopExit2, LoopExit) &&
-            (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
-          LLVM_DEBUG(dbgs() << DEBUG_PREFIX
-                            << "Can't move the target block as it would "
-                               "introduce a new backwards WLS branch\n");
-          CanMove = false;
-          break;
-        }
-      }
-
-      if (CanMove) {
-        // Make sure no LEs become forwards.
-        // An example loop structure where the LoopExit can't be moved, since
-        // bb2's LE will become forwards once bb1 is moved after bb3.
-        // bb1:           - LoopExit
-        // bb2:
-        //      LE  bb1  - Terminator
-        // bb3:          - Preheader
-        //      WLS bb1
-        // bb4:          - Header
-        for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
-             It++) {
-          MachineBasicBlock *MBB = &*It;
-          for (auto &Terminator : MBB->terminators()) {
-            if (Terminator.getOpcode() != ARM::t2LoopEndDec)
-              continue;
-            MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
-            // The LE will become forwards branching if it branches to LoopExit
-            // which isn't allowed by the architecture, so we should avoid
-            // introducing these.
-            // TODO: Analyse the blocks to make a decision if it would be worth
-            // moving LoopExit even if we'd introduce a forwards LE
-            if (LETarget == LoopExit) {
-              LLVM_DEBUG(dbgs() << DEBUG_PREFIX
-                                << "Can't move the target block as it would "
-                                   "introduce a new forwards LE branch\n");
-              CanMove = false;
-              break;
-            }
-          }
-        }
-
-        if (!CanMove)
-          break;
-      }
-
-      if (CanMove) {
-        moveBasicBlock(LoopExit, Preheader);
-        Changed = true;
-        break;
-      }
-    }
-  }
-
-  return Changed;
-}
-
-bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
-                                      MachineBasicBlock *Other) {
-  return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
-}
-
-void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
-                                       MachineBasicBlock *After) {
-  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
-                    << After->getName() << "\n");
-  MachineBasicBlock *BBPrevious = BB->getPrevNode();
-  assert(BBPrevious && "Cannot move the function entry basic block");
-  MachineBasicBlock *AfterNext = After->getNextNode();
-  MachineBasicBlock *BBNext = BB->getNextNode();
-
-  BB->moveAfter(After);
-
-  auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
-    LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
-                      << From->getName() << " to " << To->getName() << "\n");
-    assert(From->isSuccessor(To) &&
-           "'To' is expected to be a successor of 'From'");
-    MachineInstr &Terminator = *(--From->terminators().end());
-    if (!Terminator.isUnconditionalBranch()) {
-      // The BB doesn't have an unconditional branch so it relied on
-      // fall-through. Fix by adding an unconditional branch to the moved BB.
-      MachineInstrBuilder MIB =
-          BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
-      MIB.addMBB(To);
-      MIB.addImm(ARMCC::CondCodes::AL);
-      MIB.addReg(ARM::NoRegister);
-      LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
-                        << From->getName() << " to " << To->getName() << ": "
-                        << *MIB.getInstr());
-    }
-  };
-
-  // Fix fall-through to the moved BB from the one that used to be before it.
-  if (BBPrevious->isSuccessor(BB))
-    FixFallthrough(BBPrevious, BB);
-  // Fix fall through from the destination BB to the one that used to follow.
-  if (AfterNext && After->isSuccessor(AfterNext))
-    FixFallthrough(After, AfterNext);
-  // Fix fall through from the moved BB to the one that used to follow.
-  if (BBNext && BB->isSuccessor(BBNext))
-    FixFallthrough(BB, BBNext);
-
-  BBUtils->adjustBBOffsetsAfter(After);
-}
+//===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This pass re-arranges machine basic blocks to suit target requirements. 
+// Currently it only moves blocks to fix backwards WLS branches. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "ARM.h" 
+#include "ARMBaseInstrInfo.h" 
+#include "ARMBasicBlockInfo.h" 
+#include "ARMSubtarget.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineInstrBuilder.h" 
+#include "llvm/CodeGen/MachineLoopInfo.h" 
+ 
+using namespace llvm; 
+ 
+#define DEBUG_TYPE "arm-block-placement" 
+#define DEBUG_PREFIX "ARM Block Placement: " 
+ 
+namespace llvm { 
+class ARMBlockPlacement : public MachineFunctionPass { 
+private: 
+  const ARMBaseInstrInfo *TII; 
+  std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr; 
+  MachineLoopInfo *MLI = nullptr; 
+ 
+public: 
+  static char ID; 
+  ARMBlockPlacement() : MachineFunctionPass(ID) {} 
+ 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+  void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After); 
+  bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); 
+ 
+  void getAnalysisUsage(AnalysisUsage &AU) const override { 
+    AU.setPreservesCFG(); 
+    AU.addRequired<MachineLoopInfo>(); 
+    MachineFunctionPass::getAnalysisUsage(AU); 
+  } 
+}; 
+ 
+} // namespace llvm 
+ 
+FunctionPass *llvm::createARMBlockPlacementPass() { 
+  return new ARMBlockPlacement(); 
+} 
+ 
+char ARMBlockPlacement::ID = 0; 
+ 
+INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false, 
+                false) 
+ 
+bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { 
+  if (skipFunction(MF.getFunction())) 
+      return false; 
+  const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget()); 
+  if (!ST.hasLOB()) 
+    return false; 
+  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); 
+  MLI = &getAnalysis<MachineLoopInfo>(); 
+  TII = static_cast<const ARMBaseInstrInfo *>(ST.getInstrInfo()); 
+  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF)); 
+  MF.RenumberBlocks(); 
+  BBUtils->computeAllBlockSizes(); 
+  BBUtils->adjustBBOffsetsAfter(&MF.front()); 
+  bool Changed = false; 
+ 
+  // Find loops with a backwards branching WLS. 
+  // This requires looping over the loops in the function, checking each 
+  // preheader for a WLS and if its target is before the preheader. If moving 
+  // the target block wouldn't produce another backwards WLS or a new forwards 
+  // LE branch then move the target block after the preheader. 
+  for (auto *ML : *MLI) { 
+    MachineBasicBlock *Preheader = ML->getLoopPredecessor(); 
+    if (!Preheader) 
+      continue; 
+ 
+    for (auto &Terminator : Preheader->terminators()) { 
+      if (Terminator.getOpcode() != ARM::t2WhileLoopStart) 
+        continue; 
+      MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB(); 
+      // We don't want to move the function's entry block. 
+      if (!LoopExit->getPrevNode()) 
+        continue; 
+      if (blockIsBefore(Preheader, LoopExit)) 
+        continue; 
+      LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from " 
+                        << Preheader->getFullName() << " to " 
+                        << LoopExit->getFullName() << "\n"); 
+ 
+      // Make sure that moving the target block doesn't cause any of its WLSs 
+      // that were previously not backwards to become backwards 
+      bool CanMove = true; 
+      for (auto &LoopExitTerminator : LoopExit->terminators()) { 
+        if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart) 
+          continue; 
+        // An example loop structure where the LoopExit can't be moved, since 
+        // bb1's WLS will become backwards once it's moved after bb3 bb1: - 
+        // LoopExit 
+        //      WLS bb2  - LoopExit2 
+        // bb2: 
+        //      ... 
+        // bb3:          - Preheader 
+        //      WLS bb1 
+        // bb4:          - Header 
+        MachineBasicBlock *LoopExit2 = 
+            LoopExitTerminator.getOperand(1).getMBB(); 
+        // If the WLS from LoopExit to LoopExit2 is already backwards then 
+        // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is 
+        // after the Preheader then moving will keep it as a forward branch, so 
+        // it can be moved. If LoopExit2 is between the Preheader and LoopExit 
+        // then moving LoopExit will make it a backwards branch, so it can't be 
+        // moved since we'd fix one and introduce one backwards branch. 
+        // TODO: Analyse the blocks to make a decision if it would be worth 
+        // moving LoopExit even if LoopExit2 is between the Preheader and 
+        // LoopExit. 
+        if (!blockIsBefore(LoopExit2, LoopExit) && 
+            (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) { 
+          LLVM_DEBUG(dbgs() << DEBUG_PREFIX 
+                            << "Can't move the target block as it would " 
+                               "introduce a new backwards WLS branch\n"); 
+          CanMove = false; 
+          break; 
+        } 
+      } 
+ 
+      if (CanMove) { 
+        // Make sure no LEs become forwards. 
+        // An example loop structure where the LoopExit can't be moved, since 
+        // bb2's LE will become forwards once bb1 is moved after bb3. 
+        // bb1:           - LoopExit 
+        // bb2: 
+        //      LE  bb1  - Terminator 
+        // bb3:          - Preheader 
+        //      WLS bb1 
+        // bb4:          - Header 
+        for (auto It = LoopExit->getIterator(); It != Preheader->getIterator(); 
+             It++) { 
+          MachineBasicBlock *MBB = &*It; 
+          for (auto &Terminator : MBB->terminators()) { 
+            if (Terminator.getOpcode() != ARM::t2LoopEndDec) 
+              continue; 
+            MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); 
+            // The LE will become forwards branching if it branches to LoopExit 
+            // which isn't allowed by the architecture, so we should avoid 
+            // introducing these. 
+            // TODO: Analyse the blocks to make a decision if it would be worth 
+            // moving LoopExit even if we'd introduce a forwards LE 
+            if (LETarget == LoopExit) { 
+              LLVM_DEBUG(dbgs() << DEBUG_PREFIX 
+                                << "Can't move the target block as it would " 
+                                   "introduce a new forwards LE branch\n"); 
+              CanMove = false; 
+              break; 
+            } 
+          } 
+        } 
+ 
+        if (!CanMove) 
+          break; 
+      } 
+ 
+      if (CanMove) { 
+        moveBasicBlock(LoopExit, Preheader); 
+        Changed = true; 
+        break; 
+      } 
+    } 
+  } 
+ 
+  return Changed; 
+} 
+ 
+bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB, 
+                                      MachineBasicBlock *Other) { 
+  return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB); 
+} 
+ 
+void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, 
+                                       MachineBasicBlock *After) { 
+  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after " 
+                    << After->getName() << "\n"); 
+  MachineBasicBlock *BBPrevious = BB->getPrevNode(); 
+  assert(BBPrevious && "Cannot move the function entry basic block"); 
+  MachineBasicBlock *AfterNext = After->getNextNode(); 
+  MachineBasicBlock *BBNext = BB->getNextNode(); 
+ 
+  BB->moveAfter(After); 
+ 
+  auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) { 
+    LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from " 
+                      << From->getName() << " to " << To->getName() << "\n"); 
+    assert(From->isSuccessor(To) && 
+           "'To' is expected to be a successor of 'From'"); 
+    MachineInstr &Terminator = *(--From->terminators().end()); 
+    if (!Terminator.isUnconditionalBranch()) { 
+      // The BB doesn't have an unconditional branch so it relied on 
+      // fall-through. Fix by adding an unconditional branch to the moved BB. 
+      MachineInstrBuilder MIB = 
+          BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B)); 
+      MIB.addMBB(To); 
+      MIB.addImm(ARMCC::CondCodes::AL); 
+      MIB.addReg(ARM::NoRegister); 
+      LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from " 
+                        << From->getName() << " to " << To->getName() << ": " 
+                        << *MIB.getInstr()); 
+    } 
+  }; 
+ 
+  // Fix fall-through to the moved BB from the one that used to be before it. 
+  if (BBPrevious->isSuccessor(BB)) 
+    FixFallthrough(BBPrevious, BB); 
+  // Fix fall through from the destination BB to the one that used to follow. 
+  if (AfterNext && After->isSuccessor(AfterNext)) 
+    FixFallthrough(After, AfterNext); 
+  // Fix fall through from the moved BB to the one that used to follow. 
+  if (BBNext && BB->isSuccessor(BBNext)) 
+    FixFallthrough(BB, BBNext); 
+ 
+  BBUtils->adjustBBOffsetsAfter(After); 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
index 6feed82596..471474788e 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.cpp
@@ -85,11 +85,11 @@ namespace {
 
 /// Helper class for values going out through an ABI boundary (used for handling
 /// function return values and call parameters).
-struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
-  ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
-                          MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
-                          CCAssignFn *AssignFn)
-      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler { 
+  ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder, 
+                          MachineRegisterInfo &MRI, MachineInstrBuilder &MIB, 
+                          CCAssignFn *AssignFn) 
+      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 
 
   Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
@@ -257,14 +257,14 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
   CCAssignFn *AssignFn =
       TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
 
-  ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret,
-                                     AssignFn);
+  ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, 
+                                     AssignFn); 
   return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
 }
 
 bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                  const Value *Val, ArrayRef<Register> VRegs,
-                                  FunctionLoweringInfo &FLI) const {
+                                  const Value *Val, ArrayRef<Register> VRegs, 
+                                  FunctionLoweringInfo &FLI) const { 
   assert(!Val == VRegs.empty() && "Return value without a vreg");
 
   auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
@@ -282,10 +282,10 @@ namespace {
 
 /// Helper class for values coming in through an ABI boundary (used for handling
 /// formal arguments and call return values).
-struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
-  ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder,
-                          MachineRegisterInfo &MRI, CCAssignFn AssignFn)
-      : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler { 
+  ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder, 
+                          MachineRegisterInfo &MRI, CCAssignFn AssignFn) 
+      : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} 
 
   Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
@@ -335,8 +335,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
     assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
     assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
 
-    uint64_t ValSize = VA.getValVT().getFixedSizeInBits();
-    uint64_t LocSize = VA.getLocVT().getFixedSizeInBits();
+    uint64_t ValSize = VA.getValVT().getFixedSizeInBits(); 
+    uint64_t LocSize = VA.getLocVT().getFixedSizeInBits(); 
 
     assert(ValSize <= 64 && "Unsupported value size");
     assert(LocSize <= 64 && "Unsupported location size");
@@ -397,10 +397,10 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
   virtual void markPhysRegUsed(unsigned PhysReg) = 0;
 };
 
-struct FormalArgHandler : public ARMIncomingValueHandler {
+struct FormalArgHandler : public ARMIncomingValueHandler { 
   FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                    CCAssignFn AssignFn)
-      : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+      : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {} 
 
   void markPhysRegUsed(unsigned PhysReg) override {
     MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -410,10 +410,10 @@ struct FormalArgHandler : public ARMIncomingValueHandler {
 
 } // end anonymous namespace
 
-bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                           const Function &F,
-                                           ArrayRef<ArrayRef<Register>> VRegs,
-                                           FunctionLoweringInfo &FLI) const {
+bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, 
+                                           const Function &F, 
+                                           ArrayRef<ArrayRef<Register>> VRegs, 
+                                           FunctionLoweringInfo &FLI) const { 
   auto &TLI = *getTLI<ARMTargetLowering>();
   auto Subtarget = TLI.getSubtarget();
 
@@ -434,7 +434,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   for (auto &Arg : F.args()) {
     if (!isSupportedType(DL, TLI, Arg.getType()))
       return false;
-    if (Arg.hasPassPointeeByValueCopyAttr())
+    if (Arg.hasPassPointeeByValueCopyAttr()) 
       return false;
   }
 
@@ -468,10 +468,10 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
 
 namespace {
 
-struct CallReturnHandler : public ARMIncomingValueHandler {
+struct CallReturnHandler : public ARMIncomingValueHandler { 
   CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
-      : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+      : ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} 
 
   void markPhysRegUsed(unsigned PhysReg) override {
     MIB.addDef(PhysReg, RegState::Implicit);
@@ -481,16 +481,16 @@ struct CallReturnHandler : public ARMIncomingValueHandler {
 };
 
 // FIXME: This should move to the ARMSubtarget when it supports all the opcodes.
-unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI,
-                       bool isDirect) {
+unsigned getCallOpcode(const MachineFunction &MF, const ARMSubtarget &STI, 
+                       bool isDirect) { 
   if (isDirect)
     return STI.isThumb() ? ARM::tBL : ARM::BL;
 
   if (STI.isThumb())
-    return gettBLXrOpcode(MF);
+    return gettBLXrOpcode(MF); 
 
   if (STI.hasV5TOps())
-    return getBLXOpcode(MF);
+    return getBLXOpcode(MF); 
 
   if (STI.hasV4TOps())
     return ARM::BX_CALL;
@@ -518,7 +518,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
   // Create the call instruction so we can add the implicit uses of arg
   // registers, but don't insert it yet.
   bool IsDirect = !Info.Callee.isReg();
-  auto CallOpcode = getCallOpcode(MF, STI, IsDirect);
+  auto CallOpcode = getCallOpcode(MF, STI, IsDirect); 
   auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
 
   bool IsThumb = STI.isThumb();
@@ -549,8 +549,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
     splitToValueTypes(Arg, ArgInfos, MF);
   }
 
-  auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg);
-  ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+  auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg); 
+  ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn); 
   if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
     return false;
 
@@ -563,7 +563,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
 
     ArgInfos.clear();
     splitToValueTypes(Info.OrigRet, ArgInfos, MF);
-    auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg);
+    auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg); 
     CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
     if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
       return false;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
index 3be73d497d..9bff3564c5 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMCallLowering.h
@@ -33,12 +33,12 @@ public:
   ARMCallLowering(const ARMTargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<Register> VRegs,
-                   FunctionLoweringInfo &FLI) const override;
+                   ArrayRef<Register> VRegs, 
+                   FunctionLoweringInfo &FLI) const override; 
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<ArrayRef<Register>> VRegs,
-                            FunctionLoweringInfo &FLI) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs, 
+                            FunctionLoweringInfo &FLI) const override; 
 
   bool lowerCall(MachineIRBuilder &MIRBuilder,
                  CallLoweringInfo &Info) const override;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
index 630490f6f9..86faf511c9 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -338,32 +338,32 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
 }
 #endif
 
-// Align blocks where the previous block does not fall through. This may add
-// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
-// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
-static bool AlignBlocks(MachineFunction *MF) {
-  if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
-      MF->getFunction().hasOptSize())
-    return false;
-
-  auto *TLI = MF->getSubtarget().getTargetLowering();
-  const Align Alignment = TLI->getPrefLoopAlignment();
-  if (Alignment < 4)
-    return false;
-
-  bool Changed = false;
-  bool PrevCanFallthough = true;
-  for (auto &MBB : *MF) {
-    if (!PrevCanFallthough) {
-      Changed = true;
-      MBB.setAlignment(Alignment);
-    }
-    PrevCanFallthough = MBB.canFallThrough();
-  }
-
-  return Changed;
-}
-
+// Align blocks where the previous block does not fall through. This may add 
+// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a 
+// measure of how much to align, and only runs at CodeGenOpt::Aggressive. 
+static bool AlignBlocks(MachineFunction *MF) { 
+  if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive || 
+      MF->getFunction().hasOptSize()) 
+    return false; 
+ 
+  auto *TLI = MF->getSubtarget().getTargetLowering(); 
+  const Align Alignment = TLI->getPrefLoopAlignment(); 
+  if (Alignment < 4) 
+    return false; 
+ 
+  bool Changed = false; 
+  bool PrevCanFallthough = true; 
+  for (auto &MBB : *MF) { 
+    if (!PrevCanFallthough) { 
+      Changed = true; 
+      MBB.setAlignment(Alignment); 
+    } 
+    PrevCanFallthough = MBB.canFallThrough(); 
+  } 
+ 
+  return Changed; 
+} 
+ 
 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MCP = mf.getConstantPool();
@@ -385,10 +385,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   isThumb2 = AFI->isThumb2Function();
 
   bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
-  // TBB generation code in this constant island pass has not been adapted to
-  // deal with speculation barriers.
-  if (STI->hardenSlsRetBr())
-    GenerateTBB = false;
+  // TBB generation code in this constant island pass has not been adapted to 
+  // deal with speculation barriers. 
+  if (STI->hardenSlsRetBr()) 
+    GenerateTBB = false; 
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
@@ -406,9 +406,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
     MF->RenumberBlocks();
   }
 
-  // Align any non-fallthrough blocks
-  MadeChange |= AlignBlocks(MF);
-
+  // Align any non-fallthrough blocks 
+  MadeChange |= AlignBlocks(MF); 
+ 
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
@@ -524,11 +524,11 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
 
   // The function needs to be as aligned as the basic blocks. The linker may
   // move functions around based on their alignment.
-  // Special case: halfword literals still need word alignment on the function.
-  Align FuncAlign = MaxAlign;
-  if (MaxAlign == 2)
-    FuncAlign = Align(4);
-  MF->ensureAlignment(FuncAlign);
+  // Special case: halfword literals still need word alignment on the function. 
+  Align FuncAlign = MaxAlign; 
+  if (MaxAlign == 2) 
+    FuncAlign = Align(4); 
+  MF->ensureAlignment(FuncAlign); 
 
   // Order the entries in BB by descending alignment.  That ensures correct
   // alignment of all entries as long as BB is sufficiently aligned.  Keep
@@ -543,7 +543,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
 
   const DataLayout &TD = MF->getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
-    unsigned Size = CPs[i].getSizeInBytes(TD);
+    unsigned Size = CPs[i].getSizeInBytes(TD); 
     Align Alignment = CPs[i].getAlign();
     // Verify that all constant pool entries are a multiple of their alignment.
     // If not, we would have to pad them out so that instructions stay aligned.
@@ -586,12 +586,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
   MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
   for (MachineBasicBlock &MBB : *MF) {
     auto MI = MBB.getLastNonDebugInstr();
-    // Look past potential SpeculationBarriers at end of BB.
-    while (MI != MBB.end() &&
-           (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) ||
-            MI->isDebugInstr()))
-      --MI;
-
+    // Look past potential SpeculationBarriers at end of BB. 
+    while (MI != MBB.end() && 
+           (isSpeculationBarrierEndBBOpcode(MI->getOpcode()) || 
+            MI->isDebugInstr())) 
+      --MI; 
+ 
     if (MI == MBB.end())
       continue;
 
@@ -814,26 +814,26 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
 
           // Taking the address of a CP entry.
           case ARM::LEApcrel:
-          case ARM::LEApcrelJT: {
-              // This takes a SoImm, which is 8 bit immediate rotated. We'll
-              // pretend the maximum offset is 255 * 4. Since each instruction
-              // 4 byte wide, this is always correct. We'll check for other
-              // displacements that fits in a SoImm as well.
-              Bits = 8;
-              NegOk = true;
-              IsSoImm = true;
-              unsigned CPI = I.getOperand(op).getIndex();
-              assert(CPI < CPEMIs.size());
-              MachineInstr *CPEMI = CPEMIs[CPI];
-              const Align CPEAlign = getCPEAlign(CPEMI);
-              const unsigned LogCPEAlign = Log2(CPEAlign);
-              if (LogCPEAlign >= 2)
-                Scale = 4;
-              else
-                // For constants with less than 4-byte alignment,
-                // we'll pretend the maximum offset is 255 * 1.
-                Scale = 1;
-            }
+          case ARM::LEApcrelJT: { 
+              // This takes a SoImm, which is 8 bit immediate rotated. We'll 
+              // pretend the maximum offset is 255 * 4. Since each instruction 
+              // 4 byte wide, this is always correct. We'll check for other 
+              // displacements that fits in a SoImm as well. 
+              Bits = 8; 
+              NegOk = true; 
+              IsSoImm = true; 
+              unsigned CPI = I.getOperand(op).getIndex(); 
+              assert(CPI < CPEMIs.size()); 
+              MachineInstr *CPEMI = CPEMIs[CPI]; 
+              const Align CPEAlign = getCPEAlign(CPEMI); 
+              const unsigned LogCPEAlign = Log2(CPEAlign); 
+              if (LogCPEAlign >= 2) 
+                Scale = 4; 
+              else 
+                // For constants with less than 4-byte alignment, 
+                // we'll pretend the maximum offset is 255 * 1. 
+                Scale = 1; 
+            } 
             break;
           case ARM::t2LEApcrel:
           case ARM::t2LEApcrelJT:
@@ -2124,7 +2124,7 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
   MachineFunction *MF = MBB->getParent();
   ++MBB;
 
-  return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI;
+  return MBB != MF->end() && !MBB->empty() && &*MBB->begin() == CPEMI; 
 }
 
 static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a7f1765a93..a38327ffe6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -875,25 +875,25 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
 
     assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
     unsigned ImmVal = (unsigned)MO.getImm();
-    unsigned SOImmValV1 = 0, SOImmValV2 = 0;
-
-    if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
-      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
-      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
-          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-          .addReg(DstReg);
-      SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
-      SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
-    } else { // Expand into a mvn + sub.
-      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
-      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
-          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-          .addReg(DstReg);
-      SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
-      SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
-      SOImmValV1 = ~(-SOImmValV1);
-    }
-
+    unsigned SOImmValV1 = 0, SOImmValV2 = 0; 
+ 
+    if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr. 
+      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); 
+      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) 
+          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 
+          .addReg(DstReg); 
+      SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); 
+      SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); 
+    } else { // Expand into a mvn + sub. 
+      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); 
+      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) 
+          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 
+          .addReg(DstReg); 
+      SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); 
+      SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); 
+      SOImmValV1 = ~(-SOImmValV1); 
+    } 
+ 
     unsigned MIFlags = MI.getFlags();
     LO16 = LO16.addImm(SOImmValV1);
     HI16 = HI16.addImm(SOImmValV2);
@@ -1871,66 +1871,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     default:
       return false;
 
-    case ARM::VBSPd:
-    case ARM::VBSPq: {
-      Register DstReg = MI.getOperand(0).getReg();
-      if (DstReg == MI.getOperand(3).getReg()) {
-        // Expand to VBIT
-        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(3))
-            .add(MI.getOperand(2))
-            .add(MI.getOperand(1))
-            .addImm(MI.getOperand(4).getImm())
-            .add(MI.getOperand(5));
-      } else if (DstReg == MI.getOperand(2).getReg()) {
-        // Expand to VBIF
-        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(2))
-            .add(MI.getOperand(3))
-            .add(MI.getOperand(1))
-            .addImm(MI.getOperand(4).getImm())
-            .add(MI.getOperand(5));
-      } else {
-        // Expand to VBSL
-        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
-        if (DstReg == MI.getOperand(1).getReg()) {
-          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
-              .add(MI.getOperand(0))
-              .add(MI.getOperand(1))
-              .add(MI.getOperand(2))
-              .add(MI.getOperand(3))
-              .addImm(MI.getOperand(4).getImm())
-              .add(MI.getOperand(5));
-        } else {
-          // Use move to satisfy constraints
-          unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
-          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
-              .addReg(DstReg,
-                      RegState::Define |
-                          getRenamableRegState(MI.getOperand(0).isRenamable()))
-              .add(MI.getOperand(1))
-              .add(MI.getOperand(1))
-              .addImm(MI.getOperand(4).getImm())
-              .add(MI.getOperand(5));
-          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
-              .add(MI.getOperand(0))
-              .addReg(DstReg,
-                      RegState::Kill |
-                          getRenamableRegState(MI.getOperand(0).isRenamable()))
-              .add(MI.getOperand(2))
-              .add(MI.getOperand(3))
-              .addImm(MI.getOperand(4).getImm())
-              .add(MI.getOperand(5));
-        }
-      }
-      MI.eraseFromParent();
-      return true;
-    }
-
+    case ARM::VBSPd: 
+    case ARM::VBSPq: { 
+      Register DstReg = MI.getOperand(0).getReg(); 
+      if (DstReg == MI.getOperand(3).getReg()) { 
+        // Expand to VBIT 
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; 
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 
+            .add(MI.getOperand(0)) 
+            .add(MI.getOperand(3)) 
+            .add(MI.getOperand(2)) 
+            .add(MI.getOperand(1)) 
+            .addImm(MI.getOperand(4).getImm()) 
+            .add(MI.getOperand(5)); 
+      } else if (DstReg == MI.getOperand(2).getReg()) { 
+        // Expand to VBIF 
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; 
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 
+            .add(MI.getOperand(0)) 
+            .add(MI.getOperand(2)) 
+            .add(MI.getOperand(3)) 
+            .add(MI.getOperand(1)) 
+            .addImm(MI.getOperand(4).getImm()) 
+            .add(MI.getOperand(5)); 
+      } else { 
+        // Expand to VBSL 
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; 
+        if (DstReg == MI.getOperand(1).getReg()) { 
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 
+              .add(MI.getOperand(0)) 
+              .add(MI.getOperand(1)) 
+              .add(MI.getOperand(2)) 
+              .add(MI.getOperand(3)) 
+              .addImm(MI.getOperand(4).getImm()) 
+              .add(MI.getOperand(5)); 
+        } else { 
+          // Use move to satisfy constraints 
+          unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; 
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) 
+              .addReg(DstReg, 
+                      RegState::Define | 
+                          getRenamableRegState(MI.getOperand(0).isRenamable())) 
+              .add(MI.getOperand(1)) 
+              .add(MI.getOperand(1)) 
+              .addImm(MI.getOperand(4).getImm()) 
+              .add(MI.getOperand(5)); 
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 
+              .add(MI.getOperand(0)) 
+              .addReg(DstReg, 
+                      RegState::Kill | 
+                          getRenamableRegState(MI.getOperand(0).isRenamable())) 
+              .add(MI.getOperand(2)) 
+              .add(MI.getOperand(3)) 
+              .addImm(MI.getOperand(4).getImm()) 
+              .add(MI.getOperand(5)); 
+        } 
+      } 
+      MI.eraseFromParent(); 
+      return true; 
+    } 
+ 
     case ARM::TCRETURNdi:
     case ARM::TCRETURNri: {
       MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -2304,9 +2304,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
           MIB.addImm(0);
         MIB.add(predOps(ARMCC::AL));
 
-        MIB =
-            BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                    TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
+        MIB = 
+            BuildMI(MBB, MBBI, MI.getDebugLoc(), 
+                    TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); 
         if (Thumb)
           MIB.add(predOps(ARMCC::AL));
         MIB.addReg(Reg, RegState::Kill);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
index da1d9af8d5..483aeb4d72 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFastISel.cpp
@@ -606,9 +606,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
     }
   }
 
-  if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
-      (Subtarget->isTargetMachO() && IsIndirect) ||
-      Subtarget->genLongCalls()) {
+  if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) || 
+      (Subtarget->isTargetMachO() && IsIndirect) || 
+      Subtarget->genLongCalls()) { 
     MachineInstrBuilder MIB;
     unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
     if (isThumb2)
@@ -2175,7 +2175,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
 
 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
   if (UseReg)
-    return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
+    return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF); 
   else
     return isThumb2 ? ARM::tBL : ARM::BL;
 }
@@ -2266,11 +2266,11 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   // BL / BLX don't take a predicate, but tBL / tBLX do.
   if (isThumb2)
     MIB.add(predOps(ARMCC::AL));
-  if (Subtarget->genLongCalls()) {
-    CalleeReg =
-        constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
+  if (Subtarget->genLongCalls()) { 
+    CalleeReg = 
+        constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0); 
     MIB.addReg(CalleeReg);
-  } else
+  } else 
     MIB.addExternalSymbol(TLI.getLibcallName(Call));
 
   // Add implicit physical register uses to the call.
@@ -2408,11 +2408,11 @@ bool ARMFastISel::SelectCall(const Instruction *I,
   // ARM calls don't take a predicate, but tBL / tBLX do.
   if(isThumb2)
     MIB.add(predOps(ARMCC::AL));
-  if (UseReg) {
-    CalleeReg =
-        constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
+  if (UseReg) { 
+    CalleeReg = 
+        constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0); 
     MIB.addReg(CalleeReg);
-  } else if (!IntrMemName)
+  } else if (!IntrMemName) 
     MIB.addGlobalAddress(GV, 0, 0);
   else
     MIB.addExternalSymbol(IntrMemName, 0);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h b/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
index 99e0ef05b5..6d8e75a2ec 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFeatures.h
@@ -75,7 +75,7 @@ inline bool isV8EligibleForIT(const InstrType *Instr) {
 // there are some "conditionally deprecated" opcodes
   case ARM::tADDspr:
   case ARM::tBLXr:
-  case ARM::tBLXr_noip:
+  case ARM::tBLXr_noip: 
     return Instr->getOperand(2).getReg() != ARM::PC;
   // ADD PC, SP and BLX PC were always unpredictable,
   // now on top of it they're deprecated
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
index 9eeb7f20dc..e0a657b505 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.cpp
@@ -883,10 +883,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 /// debug info.  It's the same as what we use for resolving the code-gen
 /// references for now.  FIXME: This can go wrong when references are
 /// SP-relative and simple call frames aren't used.
-StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
-                                                     int FI,
-                                                     Register &FrameReg) const {
-  return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
+StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, 
+                                                     int FI, 
+                                                     Register &FrameReg) const { 
+  return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0)); 
 }
 
 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
@@ -2114,7 +2114,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       unsigned NumExtras = TargetAlign.value() / 4;
       SmallVector<unsigned, 2> Extras;
       while (NumExtras && !UnspilledCS1GPRs.empty()) {
-        unsigned Reg = UnspilledCS1GPRs.pop_back_val();
+        unsigned Reg = UnspilledCS1GPRs.pop_back_val(); 
         if (!MRI.isReserved(Reg) &&
             (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
           Extras.push_back(Reg);
@@ -2124,7 +2124,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       // For non-Thumb1 functions, also check for hi-reg CS registers
       if (!AFI->isThumb1OnlyFunction()) {
         while (NumExtras && !UnspilledCS2GPRs.empty()) {
-          unsigned Reg = UnspilledCS2GPRs.pop_back_val();
+          unsigned Reg = UnspilledCS2GPRs.pop_back_val(); 
           if (!MRI.isReserved(Reg)) {
             Extras.push_back(Reg);
             NumExtras--;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
index 9822e2321b..c609c07043 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMFrameLowering.h
@@ -10,7 +10,7 @@
 #define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H
 
 #include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/TypeSize.h" 
 
 namespace llvm {
 
@@ -48,8 +48,8 @@ public:
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
-  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
-                                     Register &FrameReg) const override;
+  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, 
+                                     Register &FrameReg) const override; 
   int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
                                  Register &FrameReg, int SPAdj) const;
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
index f083fa6662..48df96b5e6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -10,19 +10,19 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/ValueTracking.h" 
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-
+#include "llvm/Support/CommandLine.h" 
+ 
 using namespace llvm;
 
-static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1),
-                                 cl::Hidden);
-static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict",
-                                        cl::init(false), cl::Hidden);
-
+static cl::opt<int> DataBankMask("arm-data-bank-mask", cl::init(-1), 
+                                 cl::Hidden); 
+static cl::opt<bool> AssumeITCMConflict("arm-assume-itcm-bankconflict", 
+                                        cl::init(false), cl::Hidden); 
+ 
 static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
                          const TargetRegisterInfo &TRI) {
   // FIXME: Detect integer instructions properly.
@@ -39,7 +39,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
 }
 
 ScheduleHazardRecognizer::HazardType
-ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
+ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) { 
   assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
 
   MachineInstr *MI = SU->getInstr();
@@ -76,15 +76,15 @@ ARMHazardRecognizerFPMLx::getHazardType(SUnit *SU, int Stalls) {
       }
     }
   }
-  return NoHazard;
+  return NoHazard; 
 }
 
-void ARMHazardRecognizerFPMLx::Reset() {
+void ARMHazardRecognizerFPMLx::Reset() { 
   LastMI = nullptr;
   FpMLxStalls = 0;
 }
 
-void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
+void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) { 
   MachineInstr *MI = SU->getInstr();
   if (!MI->isDebugInstr()) {
     LastMI = MI;
@@ -92,177 +92,177 @@ void ARMHazardRecognizerFPMLx::EmitInstruction(SUnit *SU) {
   }
 }
 
-void ARMHazardRecognizerFPMLx::AdvanceCycle() {
+void ARMHazardRecognizerFPMLx::AdvanceCycle() { 
   if (FpMLxStalls && --FpMLxStalls == 0)
     // Stalled for 4 cycles but still can't schedule any other instructions.
     LastMI = nullptr;
 }
 
-void ARMHazardRecognizerFPMLx::RecedeCycle() {
+void ARMHazardRecognizerFPMLx::RecedeCycle() { 
   llvm_unreachable("reverse ARM hazard checking unsupported");
 }
-
-///////// Bank conflicts handled as hazards //////////////
-
-static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
-                          int64_t &Offset) {
-
-  uint64_t TSFlags = MI.getDesc().TSFlags;
-  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
-  unsigned IndexMode =
-      (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
-
-  // Address mode tells us what we want to know about operands for T2
-  // instructions (but not size).  It tells us size (but not about operands)
-  // for T1 instructions.
-  switch (AddrMode) {
-  default:
-    return false;
-  case ARMII::AddrModeT2_i8:
-    // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8,
-    // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8,
-    // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8,
-    // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8,
-    // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8
-    BaseOp = &MI.getOperand(1);
-    Offset = (IndexMode == ARMII::IndexModePost)
-                 ? 0
-                 : (IndexMode == ARMII::IndexModePre ||
-                    IndexMode == ARMII::IndexModeUpd)
-                       ? MI.getOperand(3).getImm()
-                       : MI.getOperand(2).getImm();
-    return true;
-  case ARMII::AddrModeT2_i12:
-    // t2LDRBi12, t2LDRHi12
-    // t2LDRSBi12, t2LDRSHi12
-    // t2LDRi12
-    BaseOp = &MI.getOperand(1);
-    Offset = MI.getOperand(2).getImm();
-    return true;
-  case ARMII::AddrModeT2_i8s4:
-    // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8
-    BaseOp = &MI.getOperand(2);
-    Offset = (IndexMode == ARMII::IndexModePost)
-                 ? 0
-                 : (IndexMode == ARMII::IndexModePre ||
-                    IndexMode == ARMII::IndexModeUpd)
-                       ? MI.getOperand(4).getImm()
-                       : MI.getOperand(3).getImm();
-    return true;
-  case ARMII::AddrModeT1_1:
-    // tLDRBi, tLDRBr (watch out!), TLDRSB
-  case ARMII::AddrModeT1_2:
-    // tLDRHi, tLDRHr (watch out!), TLDRSH
-  case ARMII::AddrModeT1_4:
-    // tLDRi, tLDRr (watch out!)
-    BaseOp = &MI.getOperand(1);
-    Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0;
-    return MI.getOperand(2).isImm();
-  }
-  return false;
-}
-
-ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
-    const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
-    : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
-      DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
-                                                : CPUBankMask),
-      AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
-                                 ? AssumeITCMConflict
-                                 : CPUAssumeITCMConflict) {
-  MaxLookAhead = 1;
-}
-
-ScheduleHazardRecognizer::HazardType
-ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) {
-  return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard;
-}
-
-ScheduleHazardRecognizer::HazardType
-ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
-  MachineInstr &L0 = *SU->getInstr();
-  if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1)
-    return NoHazard;
-
-  auto MO0 = *L0.memoperands().begin();
-  auto BaseVal0 = MO0->getValue();
-  auto BasePseudoVal0 = MO0->getPseudoValue();
-  int64_t Offset0 = 0;
-
-  if (MO0->getSize() > 4)
-    return NoHazard;
-
-  bool SPvalid = false;
-  const MachineOperand *SP = nullptr;
-  int64_t SPOffset0 = 0;
-
-  for (auto L1 : Accesses) {
-    auto MO1 = *L1->memoperands().begin();
-    auto BaseVal1 = MO1->getValue();
-    auto BasePseudoVal1 = MO1->getPseudoValue();
-    int64_t Offset1 = 0;
-
-    // Pointers to the same object
-    if (BaseVal0 && BaseVal1) {
-      const Value *Ptr0, *Ptr1;
-      Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true);
-      Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true);
-      if (Ptr0 == Ptr1 && Ptr0)
-        return CheckOffsets(Offset0, Offset1);
-    }
-
-    if (BasePseudoVal0 && BasePseudoVal1 &&
-        BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
-        BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) {
-      // Spills/fills
-      auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0);
-      auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1);
-      Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex());
-      Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex());
-      return CheckOffsets(Offset0, Offset1);
-    }
-
-    // Constant pools (likely in ITCM)
-    if (BasePseudoVal0 && BasePseudoVal1 &&
-        BasePseudoVal0->kind() == BasePseudoVal1->kind() &&
-        BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict)
-      return Hazard;
-
-    // Is this a stack pointer-relative access?  We could in general try to
-    // use "is this the same register and is it unchanged?", but the
-    // memory operand tracking is highly likely to have already found that.
-    // What we're after here is bank conflicts between different objects in
-    // the stack frame.
-    if (!SPvalid) { // set up SP
-      if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP)
-        SP = nullptr;
-      SPvalid = true;
-    }
-    if (SP) {
-      int64_t SPOffset1;
-      const MachineOperand *SP1;
-      if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP)
-        return CheckOffsets(SPOffset0, SPOffset1);
-    }
-  }
-
-  return NoHazard;
-}
-
-void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); }
-
-void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
-  MachineInstr &MI = *SU->getInstr();
-  if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1)
-    return;
-
-  auto MO = *MI.memoperands().begin();
-  uint64_t Size1 = MO->getSize();
-  if (Size1 > 4)
-    return;
-  Accesses.push_back(&MI);
-}
-
-void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
-
-void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
+ 
+///////// Bank conflicts handled as hazards ////////////// 
+ 
+static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, 
+                          int64_t &Offset) { 
+ 
+  uint64_t TSFlags = MI.getDesc().TSFlags; 
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 
+  unsigned IndexMode = 
+      (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; 
+ 
+  // Address mode tells us what we want to know about operands for T2 
+  // instructions (but not size).  It tells us size (but not about operands) 
+  // for T1 instructions. 
+  switch (AddrMode) { 
+  default: 
+    return false; 
+  case ARMII::AddrModeT2_i8: 
+    // t2LDRBT, t2LDRB_POST, t2LDRB_PRE, t2LDRBi8, 
+    // t2LDRHT, t2LDRH_POST, t2LDRH_PRE, t2LDRHi8, 
+    // t2LDRSBT, t2LDRSB_POST, t2LDRSB_PRE, t2LDRSBi8, 
+    // t2LDRSHT, t2LDRSH_POST, t2LDRSH_PRE, t2LDRSHi8, 
+    // t2LDRT, t2LDR_POST, t2LDR_PRE, t2LDRi8 
+    BaseOp = &MI.getOperand(1); 
+    Offset = (IndexMode == ARMII::IndexModePost) 
+                 ? 0 
+                 : (IndexMode == ARMII::IndexModePre || 
+                    IndexMode == ARMII::IndexModeUpd) 
+                       ? MI.getOperand(3).getImm() 
+                       : MI.getOperand(2).getImm(); 
+    return true; 
+  case ARMII::AddrModeT2_i12: 
+    // t2LDRBi12, t2LDRHi12 
+    // t2LDRSBi12, t2LDRSHi12 
+    // t2LDRi12 
+    BaseOp = &MI.getOperand(1); 
+    Offset = MI.getOperand(2).getImm(); 
+    return true; 
+  case ARMII::AddrModeT2_i8s4: 
+    // t2LDRD_POST, t2LDRD_PRE, t2LDRDi8 
+    BaseOp = &MI.getOperand(2); 
+    Offset = (IndexMode == ARMII::IndexModePost) 
+                 ? 0 
+                 : (IndexMode == ARMII::IndexModePre || 
+                    IndexMode == ARMII::IndexModeUpd) 
+                       ? MI.getOperand(4).getImm() 
+                       : MI.getOperand(3).getImm(); 
+    return true; 
+  case ARMII::AddrModeT1_1: 
+    // tLDRBi, tLDRBr (watch out!), TLDRSB 
+  case ARMII::AddrModeT1_2: 
+    // tLDRHi, tLDRHr (watch out!), TLDRSH 
+  case ARMII::AddrModeT1_4: 
+    // tLDRi, tLDRr (watch out!) 
+    BaseOp = &MI.getOperand(1); 
+    Offset = MI.getOperand(2).isImm() ? MI.getOperand(2).getImm() : 0; 
+    return MI.getOperand(2).isImm(); 
+  } 
+  return false; 
+} 
+ 
+ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer( 
+    const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict) 
+    : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()), 
+      DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask) 
+                                                : CPUBankMask), 
+      AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences() 
+                                 ? AssumeITCMConflict 
+                                 : CPUAssumeITCMConflict) { 
+  MaxLookAhead = 1; 
+} 
+ 
+ScheduleHazardRecognizer::HazardType 
+ARMBankConflictHazardRecognizer::CheckOffsets(unsigned O0, unsigned O1) { 
+  return (((O0 ^ O1) & DataMask) != 0) ? NoHazard : Hazard; 
+} 
+ 
+ScheduleHazardRecognizer::HazardType 
+ARMBankConflictHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 
+  MachineInstr &L0 = *SU->getInstr(); 
+  if (!L0.mayLoad() || L0.mayStore() || L0.getNumMemOperands() != 1) 
+    return NoHazard; 
+ 
+  auto MO0 = *L0.memoperands().begin(); 
+  auto BaseVal0 = MO0->getValue(); 
+  auto BasePseudoVal0 = MO0->getPseudoValue(); 
+  int64_t Offset0 = 0; 
+ 
+  if (MO0->getSize() > 4) 
+    return NoHazard; 
+ 
+  bool SPvalid = false; 
+  const MachineOperand *SP = nullptr; 
+  int64_t SPOffset0 = 0; 
+ 
+  for (auto L1 : Accesses) { 
+    auto MO1 = *L1->memoperands().begin(); 
+    auto BaseVal1 = MO1->getValue(); 
+    auto BasePseudoVal1 = MO1->getPseudoValue(); 
+    int64_t Offset1 = 0; 
+ 
+    // Pointers to the same object 
+    if (BaseVal0 && BaseVal1) { 
+      const Value *Ptr0, *Ptr1; 
+      Ptr0 = GetPointerBaseWithConstantOffset(BaseVal0, Offset0, DL, true); 
+      Ptr1 = GetPointerBaseWithConstantOffset(BaseVal1, Offset1, DL, true); 
+      if (Ptr0 == Ptr1 && Ptr0) 
+        return CheckOffsets(Offset0, Offset1); 
+    } 
+ 
+    if (BasePseudoVal0 && BasePseudoVal1 && 
+        BasePseudoVal0->kind() == BasePseudoVal1->kind() && 
+        BasePseudoVal0->kind() == PseudoSourceValue::FixedStack) { 
+      // Spills/fills 
+      auto FS0 = cast<FixedStackPseudoSourceValue>(BasePseudoVal0); 
+      auto FS1 = cast<FixedStackPseudoSourceValue>(BasePseudoVal1); 
+      Offset0 = MF.getFrameInfo().getObjectOffset(FS0->getFrameIndex()); 
+      Offset1 = MF.getFrameInfo().getObjectOffset(FS1->getFrameIndex()); 
+      return CheckOffsets(Offset0, Offset1); 
+    } 
+ 
+    // Constant pools (likely in ITCM) 
+    if (BasePseudoVal0 && BasePseudoVal1 && 
+        BasePseudoVal0->kind() == BasePseudoVal1->kind() && 
+        BasePseudoVal0->isConstantPool() && AssumeITCMBankConflict) 
+      return Hazard; 
+ 
+    // Is this a stack pointer-relative access?  We could in general try to 
+    // use "is this the same register and is it unchanged?", but the 
+    // memory operand tracking is highly likely to have already found that. 
+    // What we're after here is bank conflicts between different objects in 
+    // the stack frame. 
+    if (!SPvalid) { // set up SP 
+      if (!getBaseOffset(L0, SP, SPOffset0) || SP->getReg().id() != ARM::SP) 
+        SP = nullptr; 
+      SPvalid = true; 
+    } 
+    if (SP) { 
+      int64_t SPOffset1; 
+      const MachineOperand *SP1; 
+      if (getBaseOffset(*L1, SP1, SPOffset1) && SP1->getReg().id() == ARM::SP) 
+        return CheckOffsets(SPOffset0, SPOffset1); 
+    } 
+  } 
+ 
+  return NoHazard; 
+} 
+ 
+void ARMBankConflictHazardRecognizer::Reset() { Accesses.clear(); } 
+ 
+void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) { 
+  MachineInstr &MI = *SU->getInstr(); 
+  if (!MI.mayLoad() || MI.mayStore() || MI.getNumMemOperands() != 1) 
+    return; 
+ 
+  auto MO = *MI.memoperands().begin(); 
+  uint64_t Size1 = MO->getSize(); 
+  if (Size1 > 4) 
+    return; 
+  Accesses.push_back(&MI); 
+} 
+ 
+void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); } 
+ 
+void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); } 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
index c1f1bcd0a6..e6b5304488 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMHazardRecognizer.h
@@ -13,28 +13,28 @@
 #ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
 #define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H
 
-#include "ARMBaseInstrInfo.h"
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Support/DataTypes.h"
-#include <array>
-#include <initializer_list>
+#include "ARMBaseInstrInfo.h" 
+#include "llvm/ADT/BitmaskEnum.h" 
+#include "llvm/ADT/SmallVector.h" 
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 
+#include "llvm/Support/DataTypes.h" 
+#include <array> 
+#include <initializer_list> 
 
 namespace llvm {
 
-class DataLayout;
-class MachineFunction;
+class DataLayout; 
+class MachineFunction; 
 class MachineInstr;
-class ScheduleDAG;
+class ScheduleDAG; 
 
-// Hazards related to FP MLx instructions
-class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
+// Hazards related to FP MLx instructions 
+class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer { 
   MachineInstr *LastMI = nullptr;
   unsigned FpMLxStalls = 0;
 
 public:
-  ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; }
+  ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; } 
 
   HazardType getHazardType(SUnit *SU, int Stalls) override;
   void Reset() override;
@@ -43,27 +43,27 @@ public:
   void RecedeCycle() override;
 };
 
-// Hazards related to bank conflicts
-class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer {
-  SmallVector<MachineInstr *, 8> Accesses;
-  const MachineFunction &MF;
-  const DataLayout &DL;
-  int64_t DataMask;
-  bool AssumeITCMBankConflict;
-
-public:
-  ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM,
-                                  bool ABC);
-  HazardType getHazardType(SUnit *SU, int Stalls) override;
-  void Reset() override;
-  void EmitInstruction(SUnit *SU) override;
-  void AdvanceCycle() override;
-  void RecedeCycle() override;
-
-private:
-  inline HazardType CheckOffsets(unsigned O0, unsigned O1);
-};
-
+// Hazards related to bank conflicts 
+class ARMBankConflictHazardRecognizer : public ScheduleHazardRecognizer { 
+  SmallVector<MachineInstr *, 8> Accesses; 
+  const MachineFunction &MF; 
+  const DataLayout &DL; 
+  int64_t DataMask; 
+  bool AssumeITCMBankConflict; 
+ 
+public: 
+  ARMBankConflictHazardRecognizer(const ScheduleDAG *DAG, int64_t DDM, 
+                                  bool ABC); 
+  HazardType getHazardType(SUnit *SU, int Stalls) override; 
+  void Reset() override; 
+  void EmitInstruction(SUnit *SU) override; 
+  void AdvanceCycle() override; 
+  void RecedeCycle() override; 
+ 
+private: 
+  inline HazardType CheckOffsets(unsigned O0, unsigned O1); 
+}; 
+ 
 } // end namespace llvm
 
 #endif
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
index 598062672a..2daf77fb5e 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.cpp
@@ -143,7 +143,7 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
     cl::desc("Maximum size of ALL constants to promote into a constant pool"),
     cl::init(128));
 
-cl::opt<unsigned>
+cl::opt<unsigned> 
 MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
   cl::desc("Maximum interleave factor for MVE VLDn to generate."),
   cl::init(2));
@@ -289,8 +289,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::UDIVREM, VT, Expand);
     setOperationAction(ISD::SDIVREM, VT, Expand);
     setOperationAction(ISD::CTPOP, VT, Expand);
-    setOperationAction(ISD::SELECT, VT, Expand);
-    setOperationAction(ISD::SELECT_CC, VT, Expand);
+    setOperationAction(ISD::SELECT, VT, Expand); 
+    setOperationAction(ISD::SELECT_CC, VT, Expand); 
 
     // Vector reductions
     setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
@@ -337,8 +337,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::SETCC, VT, Custom);
     setOperationAction(ISD::MLOAD, VT, Custom);
     setOperationAction(ISD::MSTORE, VT, Legal);
-    setOperationAction(ISD::SELECT, VT, Expand);
-    setOperationAction(ISD::SELECT_CC, VT, Expand);
+    setOperationAction(ISD::SELECT, VT, Expand); 
+    setOperationAction(ISD::SELECT_CC, VT, Expand); 
 
     // Pre and Post inc are supported on loads and stores
     for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -443,9 +443,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
     setOperationAction(ISD::LOAD, VT, Custom);
     setOperationAction(ISD::STORE, VT, Custom);
-    setOperationAction(ISD::TRUNCATE, VT, Custom);
-    setOperationAction(ISD::VSELECT, VT, Expand);
-    setOperationAction(ISD::SELECT, VT, Expand);
+    setOperationAction(ISD::TRUNCATE, VT, Custom); 
+    setOperationAction(ISD::VSELECT, VT, Expand); 
+    setOperationAction(ISD::SELECT, VT, Expand); 
   }
 }
 
@@ -994,8 +994,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::SMAX);
     setTargetDAGCombine(ISD::UMAX);
     setTargetDAGCombine(ISD::FP_EXTEND);
-    setTargetDAGCombine(ISD::SELECT);
-    setTargetDAGCombine(ISD::SELECT_CC);
+    setTargetDAGCombine(ISD::SELECT); 
+    setTargetDAGCombine(ISD::SELECT_CC); 
   }
 
   if (!Subtarget->hasFP64()) {
@@ -1725,11 +1725,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VCVTL:         return "ARMISD::VCVTL";
   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
-  case ARMISD::VQDMULH:       return "ARMISD::VQDMULH";
+  case ARMISD::VQDMULH:       return "ARMISD::VQDMULH"; 
   case ARMISD::VADDVs:        return "ARMISD::VADDVs";
   case ARMISD::VADDVu:        return "ARMISD::VADDVu";
-  case ARMISD::VADDVps:       return "ARMISD::VADDVps";
-  case ARMISD::VADDVpu:       return "ARMISD::VADDVpu";
+  case ARMISD::VADDVps:       return "ARMISD::VADDVps"; 
+  case ARMISD::VADDVpu:       return "ARMISD::VADDVpu"; 
   case ARMISD::VADDLVs:       return "ARMISD::VADDLVs";
   case ARMISD::VADDLVu:       return "ARMISD::VADDLVu";
   case ARMISD::VADDLVAs:      return "ARMISD::VADDLVAs";
@@ -1740,20 +1740,20 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VADDLVApu:     return "ARMISD::VADDLVApu";
   case ARMISD::VMLAVs:        return "ARMISD::VMLAVs";
   case ARMISD::VMLAVu:        return "ARMISD::VMLAVu";
-  case ARMISD::VMLAVps:       return "ARMISD::VMLAVps";
-  case ARMISD::VMLAVpu:       return "ARMISD::VMLAVpu";
+  case ARMISD::VMLAVps:       return "ARMISD::VMLAVps"; 
+  case ARMISD::VMLAVpu:       return "ARMISD::VMLAVpu"; 
   case ARMISD::VMLALVs:       return "ARMISD::VMLALVs";
   case ARMISD::VMLALVu:       return "ARMISD::VMLALVu";
-  case ARMISD::VMLALVps:      return "ARMISD::VMLALVps";
-  case ARMISD::VMLALVpu:      return "ARMISD::VMLALVpu";
+  case ARMISD::VMLALVps:      return "ARMISD::VMLALVps"; 
+  case ARMISD::VMLALVpu:      return "ARMISD::VMLALVpu"; 
   case ARMISD::VMLALVAs:      return "ARMISD::VMLALVAs";
   case ARMISD::VMLALVAu:      return "ARMISD::VMLALVAu";
-  case ARMISD::VMLALVAps:     return "ARMISD::VMLALVAps";
-  case ARMISD::VMLALVApu:     return "ARMISD::VMLALVApu";
-  case ARMISD::VMINVu:        return "ARMISD::VMINVu";
-  case ARMISD::VMINVs:        return "ARMISD::VMINVs";
-  case ARMISD::VMAXVu:        return "ARMISD::VMAXVu";
-  case ARMISD::VMAXVs:        return "ARMISD::VMAXVs";
+  case ARMISD::VMLALVAps:     return "ARMISD::VMLALVAps"; 
+  case ARMISD::VMLALVApu:     return "ARMISD::VMLALVApu"; 
+  case ARMISD::VMINVu:        return "ARMISD::VMINVu"; 
+  case ARMISD::VMINVs:        return "ARMISD::VMINVs"; 
+  case ARMISD::VMAXVu:        return "ARMISD::VMAXVu"; 
+  case ARMISD::VMAXVs:        return "ARMISD::VMAXVs"; 
   case ARMISD::UMAAL:         return "ARMISD::UMAAL";
   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
@@ -1777,7 +1777,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::BFI:           return "ARMISD::BFI";
   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
-  case ARMISD::VBSP:          return "ARMISD::VBSP";
+  case ARMISD::VBSP:          return "ARMISD::VBSP"; 
   case ARMISD::MEMCPY:        return "ARMISD::MEMCPY";
   case ARMISD::VLD1DUP:       return "ARMISD::VLD1DUP";
   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
@@ -2531,9 +2531,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
         Callee = DAG.getLoad(
             PtrVt, dl, DAG.getEntryNode(), Callee,
-            MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
-            MachineMemOperand::MODereferenceable |
-                MachineMemOperand::MOInvariant);
+            MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), 
+            MachineMemOperand::MODereferenceable | 
+                MachineMemOperand::MOInvariant); 
       } else if (Subtarget->isTargetCOFF()) {
         assert(Subtarget->isTargetWindows() &&
                "Windows is the only supported COFF target");
@@ -3342,7 +3342,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
   SDValue Chain = DAG.getEntryNode();
   SDValue FuncTLVGet = DAG.getLoad(
       MVT::i32, DL, Chain, DescAddr,
-      MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
+      MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), 
       MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
           MachineMemOperand::MOInvariant);
   Chain = FuncTLVGet.getValue(1);
@@ -3556,7 +3556,7 @@ static bool allUsersAreInFunction(const Value *V, const Function *F) {
   while (!Worklist.empty()) {
     auto *U = Worklist.pop_back_val();
     if (isa<ConstantExpr>(U)) {
-      append_range(Worklist, U->users());
+      append_range(Worklist, U->users()); 
       continue;
     }
 
@@ -4443,26 +4443,26 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   }
 
   // varargs
-  if (isVarArg && MFI.hasVAStart()) {
-    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
+  if (isVarArg && MFI.hasVAStart()) { 
+    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), 
                          TotalArgRegsSaveSize);
-    if (AFI->isCmseNSEntryFunction()) {
-      DiagnosticInfoUnsupported Diag(
-          DAG.getMachineFunction().getFunction(),
-          "secure entry function must not be variadic", dl.getDebugLoc());
-      DAG.getContext()->diagnose(Diag);
-    }
-  }
+    if (AFI->isCmseNSEntryFunction()) { 
+      DiagnosticInfoUnsupported Diag( 
+          DAG.getMachineFunction().getFunction(), 
+          "secure entry function must not be variadic", dl.getDebugLoc()); 
+      DAG.getContext()->diagnose(Diag); 
+    } 
+  } 
 
   AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
 
-  if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
-    DiagnosticInfoUnsupported Diag(
-        DAG.getMachineFunction().getFunction(),
-        "secure entry function requires arguments on stack", dl.getDebugLoc());
-    DAG.getContext()->diagnose(Diag);
-  }
-
+  if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) { 
+    DiagnosticInfoUnsupported Diag( 
+        DAG.getMachineFunction().getFunction(), 
+        "secure entry function requires arguments on stack", dl.getDebugLoc()); 
+    DAG.getContext()->diagnose(Diag); 
+  } 
+ 
   return Chain;
 }
 
@@ -5034,68 +5034,68 @@ static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
 //     x < k ? (x < -k ? -k : x) : k
 //     etc.
 //
-// LLVM canonicalizes these to either a min(max()) or a max(min())
-// pattern. This function tries to match one of these and will return a SSAT
-// node if successful.
+// LLVM canonicalizes these to either a min(max()) or a max(min()) 
+// pattern. This function tries to match one of these and will return a SSAT 
+// node if successful. 
 //
-// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
-// is a power of 2.
-static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  SDValue V1 = Op.getOperand(0);
-  SDValue K1 = Op.getOperand(1);
+// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 
+// is a power of 2. 
+static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) { 
+  EVT VT = Op.getValueType(); 
+  SDValue V1 = Op.getOperand(0); 
+  SDValue K1 = Op.getOperand(1); 
   SDValue TrueVal1 = Op.getOperand(2);
   SDValue FalseVal1 = Op.getOperand(3);
   ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
   if (Op2.getOpcode() != ISD::SELECT_CC)
-    return SDValue();
+    return SDValue(); 
 
-  SDValue V2 = Op2.getOperand(0);
-  SDValue K2 = Op2.getOperand(1);
+  SDValue V2 = Op2.getOperand(0); 
+  SDValue K2 = Op2.getOperand(1); 
   SDValue TrueVal2 = Op2.getOperand(2);
   SDValue FalseVal2 = Op2.getOperand(3);
   ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
 
-  SDValue V1Tmp = V1;
-  SDValue V2Tmp = V2;
+  SDValue V1Tmp = V1; 
+  SDValue V2Tmp = V2; 
 
-  // Check that the registers and the constants match a max(min()) or min(max())
-  // pattern
-  if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
-      K2 != FalseVal2 ||
-      !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
-    return SDValue();
+  // Check that the registers and the constants match a max(min()) or min(max()) 
+  // pattern 
+  if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 || 
+      K2 != FalseVal2 || 
+      !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) 
+    return SDValue(); 
 
   // Check that the constant in the lower-bound check is
   // the opposite of the constant in the upper-bound check
   // in 1's complement.
-  if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
-    return SDValue();
-
-  int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
-  int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
+  if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2)) 
+    return SDValue(); 
+ 
+  int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue(); 
+  int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue(); 
   int64_t PosVal = std::max(Val1, Val2);
   int64_t NegVal = std::min(Val1, Val2);
 
-  if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
-      !isPowerOf2_64(PosVal + 1))
-    return SDValue();
+  if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || 
+      !isPowerOf2_64(PosVal + 1)) 
+    return SDValue(); 
 
-  // Handle the difference between USAT (unsigned) and SSAT (signed)
-  // saturation
-  // At this point, PosVal is guaranteed to be positive
-  uint64_t K = PosVal;
-  SDLoc dl(Op);
-  if (Val1 == ~Val2)
-    return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
-                       DAG.getConstant(countTrailingOnes(K), dl, VT));
-  if (NegVal == 0)
-    return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
-                       DAG.getConstant(countTrailingOnes(K), dl, VT));
+  // Handle the difference between USAT (unsigned) and SSAT (signed) 
+  // saturation 
+  // At this point, PosVal is guaranteed to be positive 
+  uint64_t K = PosVal; 
+  SDLoc dl(Op); 
+  if (Val1 == ~Val2) 
+    return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp, 
+                       DAG.getConstant(countTrailingOnes(K), dl, VT)); 
+  if (NegVal == 0) 
+    return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp, 
+                       DAG.getConstant(countTrailingOnes(K), dl, VT)); 
 
-  return SDValue();
+  return SDValue(); 
 }
 
 // Check if a condition of the type x < k ? k : x can be converted into a
@@ -5155,9 +5155,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
 
   // Try to convert two saturating conditional selects into a single SSAT
-  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
-    if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
-      return SatValue;
+  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) 
+    if (SDValue SatValue = LowerSaturatingConditional(Op, DAG)) 
+      return SatValue; 
 
   // Try to convert expressions of the form x < k ? k : x (and similar forms)
   // into more efficient bit operations, which is possible when k is 0 or -1
@@ -5166,7 +5166,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // instructions.
   // Only allow this transformation on full-width (32-bit) operations
   SDValue LowerSatConstant;
-  SDValue SatValue;
+  SDValue SatValue; 
   if (VT == MVT::i32 &&
       isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
     SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
@@ -7750,19 +7750,19 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
   for (auto &Src : Sources) {
     EVT SrcVT = Src.ShuffleVec.getValueType();
 
-    uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
-    uint64_t VTSize = VT.getFixedSizeInBits();
-    if (SrcVTSize == VTSize)
+    uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); 
+    uint64_t VTSize = VT.getFixedSizeInBits(); 
+    if (SrcVTSize == VTSize) 
       continue;
 
     // This stage of the search produces a source with the same element type as
     // the original, but with a total width matching the BUILD_VECTOR output.
     EVT EltVT = SrcVT.getVectorElementType();
-    unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
+    unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); 
     EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
 
-    if (SrcVTSize < VTSize) {
-      if (2 * SrcVTSize != VTSize)
+    if (SrcVTSize < VTSize) { 
+      if (2 * SrcVTSize != VTSize) 
         return SDValue();
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
@@ -7772,7 +7772,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
       continue;
     }
 
-    if (SrcVTSize != 2 * VTSize)
+    if (SrcVTSize != 2 * VTSize) 
       return SDValue();
 
     if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
@@ -7840,7 +7840,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
     // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
     // segment.
     EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
-    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
+    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(), 
                                VT.getScalarSizeInBits());
     int LanesDefined = BitsDefined / BitsPerShuffleLane;
 
@@ -8642,23 +8642,23 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
                      DAG.getConstant(ARMCC::NE, dl, MVT::i32));
 }
 
-// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
-static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG,
-                               const ARMSubtarget *ST) {
-  assert(ST->hasMVEIntegerOps() && "Expected MVE!");
-  EVT VT = N.getValueType();
-  assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
-         "Expected a vector i1 type!");
-  SDValue Op = N.getOperand(0);
-  EVT FromVT = Op.getValueType();
-  SDLoc DL(N);
-
-  SDValue And =
-      DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
-  return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
-                     DAG.getCondCode(ISD::SETNE));
-}
-
+// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0). 
+static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG, 
+                               const ARMSubtarget *ST) { 
+  assert(ST->hasMVEIntegerOps() && "Expected MVE!"); 
+  EVT VT = N.getValueType(); 
+  assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && 
+         "Expected a vector i1 type!"); 
+  SDValue Op = N.getOperand(0); 
+  EVT FromVT = Op.getValueType(); 
+  SDLoc DL(N); 
+ 
+  SDValue And = 
+      DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT)); 
+  return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT), 
+                     DAG.getCondCode(ISD::SETNE)); 
+} 
+ 
 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
 /// element has been zero/sign-extended, depending on the isSigned parameter,
 /// from an integer type half its size.
@@ -8723,11 +8723,11 @@ static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
-/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
-/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
+/// isZeroExtended - Check if a node is a vector value that is zero-extended (or 
+/// any-extended) or a constant BUILD_VECTOR with zero-extended elements. 
 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
-  if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
-      ISD::isZEXTLoad(N))
+  if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND || 
+      ISD::isZEXTLoad(N)) 
     return true;
   if (isExtendedBUILD_VECTOR(N, DAG, false))
     return true;
@@ -8795,14 +8795,14 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
 }
 
 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
-/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
-/// the unextended value. The unextended vector should be 64 bits so that it can
+/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return 
+/// the unextended value. The unextended vector should be 64 bits so that it can 
 /// be used as an operand to a VMULL instruction. If the original vector size
 /// before extension is less than 64 bits we add a an extension to resize
 /// the vector to 64 bits.
 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
-  if (N->getOpcode() == ISD::SIGN_EXTEND ||
-      N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
+  if (N->getOpcode() == ISD::SIGN_EXTEND || 
+      N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND) 
     return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
                                         N->getOperand(0)->getValueType(0),
                                         N->getValueType(0),
@@ -9770,7 +9770,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
-  case ISD::TRUNCATE:      return LowerTruncatei1(Op, DAG, Subtarget);
+  case ISD::TRUNCATE:      return LowerTruncatei1(Op, DAG, Subtarget); 
   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::MUL:           return LowerMUL(Op, DAG);
   case ISD::SDIV:
@@ -10403,7 +10403,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
 
     // Remove the landing pad successor from the invoke block and replace it
     // with the new dispatch block.
-    SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
+    SmallVector<MachineBasicBlock*, 4> Successors(BB->successors()); 
     while (!Successors.empty()) {
       MachineBasicBlock *SMBB = Successors.pop_back_val();
       if (SMBB->isEHPad()) {
@@ -10887,7 +10887,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
 
     BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
       .addExternalSymbol("__chkstk");
-    BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
+    BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent()))) 
         .add(predOps(ARMCC::AL))
         .addReg(Reg, RegState::Kill)
         .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
@@ -11266,14 +11266,14 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return EmitLowered__chkstk(MI, BB);
   case ARM::WIN__DBZCHK:
     return EmitLowered__dbzchk(MI, BB);
-  case ARM::t2DoLoopStart:
-    // We are just here to set a register allocation hint, prefering lr for the
-    // input register to make it more likely to be movable and removable, later
-    // in the pipeline.
-    Register R = MI.getOperand(1).getReg();
-    MachineFunction *MF = MI.getParent()->getParent();
-    MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
-    return BB;
+  case ARM::t2DoLoopStart: 
+    // We are just here to set a register allocation hint, prefering lr for the 
+    // input register to make it more likely to be movable and removable, later 
+    // in the pipeline. 
+    Register R = MI.getOperand(1).getReg(); 
+    MachineFunction *MF = MI.getParent()->getParent(); 
+    MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); 
+    return BB; 
   }
 }
 
@@ -12115,198 +12115,198 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
   return SDValue();
 }
 
-static SDValue PerformSELECTCombine(SDNode *N,
-                                    TargetLowering::DAGCombinerInfo &DCI,
-                                    const ARMSubtarget *Subtarget) {
-  if (!Subtarget->hasMVEIntegerOps())
-    return SDValue();
-
-  SDLoc dl(N);
-  SDValue SetCC;
-  SDValue LHS;
-  SDValue RHS;
-  ISD::CondCode CC;
-  SDValue TrueVal;
-  SDValue FalseVal;
-
-  if (N->getOpcode() == ISD::SELECT &&
-      N->getOperand(0)->getOpcode() == ISD::SETCC) {
-    SetCC = N->getOperand(0);
-    LHS = SetCC->getOperand(0);
-    RHS = SetCC->getOperand(1);
-    CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
-    TrueVal = N->getOperand(1);
-    FalseVal = N->getOperand(2);
-  } else if (N->getOpcode() == ISD::SELECT_CC) {
-    LHS = N->getOperand(0);
-    RHS = N->getOperand(1);
-    CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-    TrueVal = N->getOperand(2);
-    FalseVal = N->getOperand(3);
-  } else {
-    return SDValue();
-  }
-
-  unsigned int Opcode = 0;
-  if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
-       FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
-      (CC == ISD::SETULT || CC == ISD::SETUGT)) {
-    Opcode = ARMISD::VMINVu;
-    if (CC == ISD::SETUGT)
-      std::swap(TrueVal, FalseVal);
-  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
-              FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
-             (CC == ISD::SETLT || CC == ISD::SETGT)) {
-    Opcode = ARMISD::VMINVs;
-    if (CC == ISD::SETGT)
-      std::swap(TrueVal, FalseVal);
-  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
-              FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
-             (CC == ISD::SETUGT || CC == ISD::SETULT)) {
-    Opcode = ARMISD::VMAXVu;
-    if (CC == ISD::SETULT)
-      std::swap(TrueVal, FalseVal);
-  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
-              FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
-             (CC == ISD::SETGT || CC == ISD::SETLT)) {
-    Opcode = ARMISD::VMAXVs;
-    if (CC == ISD::SETLT)
-      std::swap(TrueVal, FalseVal);
-  } else
-    return SDValue();
-
-  // Normalise to the right hand side being the vector reduction
-  switch (TrueVal->getOpcode()) {
-  case ISD::VECREDUCE_UMIN:
-  case ISD::VECREDUCE_SMIN:
-  case ISD::VECREDUCE_UMAX:
-  case ISD::VECREDUCE_SMAX:
-    std::swap(LHS, RHS);
-    std::swap(TrueVal, FalseVal);
-    break;
-  }
-
-  EVT VectorType = FalseVal->getOperand(0).getValueType();
-
-  if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
-      VectorType != MVT::v4i32)
-    return SDValue();
-
-  EVT VectorScalarType = VectorType.getVectorElementType();
-
-  // The values being selected must also be the ones being compared
-  if (TrueVal != LHS || FalseVal != RHS)
-    return SDValue();
-
-  EVT LeftType = LHS->getValueType(0);
-  EVT RightType = RHS->getValueType(0);
-
-  // The types must match the reduced type too
-  if (LeftType != VectorScalarType || RightType != VectorScalarType)
-    return SDValue();
-
-  // Legalise the scalar to an i32
-  if (VectorScalarType != MVT::i32)
-    LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
-  // Generate the reduction as an i32 for legalisation purposes
-  auto Reduction =
-      DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
-
-  // The result isn't actually an i32 so truncate it back to its original type
-  if (VectorScalarType != MVT::i32)
-    Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
-
-  return Reduction;
-}
-
-// A special combine for the vqdmulh family of instructions. This is one of the
-// potential set of patterns that could patch this instruction. The base pattern
-// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
-// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
-// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
-// the max is unnecessary.
-static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-  SDValue Shft;
-  ConstantSDNode *Clamp;
-
-  if (N->getOpcode() == ISD::SMIN) {
-    Shft = N->getOperand(0);
-    Clamp = isConstOrConstSplat(N->getOperand(1));
-  } else if (N->getOpcode() == ISD::VSELECT) {
-    // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
-    SDValue Cmp = N->getOperand(0);
-    if (Cmp.getOpcode() != ISD::SETCC ||
-        cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
-        Cmp.getOperand(0) != N->getOperand(1) ||
-        Cmp.getOperand(1) != N->getOperand(2))
-      return SDValue();
-    Shft = N->getOperand(1);
-    Clamp = isConstOrConstSplat(N->getOperand(2));
-  } else
-    return SDValue();
-
-  if (!Clamp)
-    return SDValue();
-
-  MVT ScalarType;
-  int ShftAmt = 0;
-  switch (Clamp->getSExtValue()) {
-  case (1 << 7) - 1:
-    ScalarType = MVT::i8;
-    ShftAmt = 7;
-    break;
-  case (1 << 15) - 1:
-    ScalarType = MVT::i16;
-    ShftAmt = 15;
-    break;
-  case (1ULL << 31) - 1:
-    ScalarType = MVT::i32;
-    ShftAmt = 31;
-    break;
-  default:
-    return SDValue();
-  }
-
-  if (Shft.getOpcode() != ISD::SRA)
-    return SDValue();
-  ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
-  if (!N1 || N1->getSExtValue() != ShftAmt)
-    return SDValue();
-
-  SDValue Mul = Shft.getOperand(0);
-  if (Mul.getOpcode() != ISD::MUL)
-    return SDValue();
-
-  SDValue Ext0 = Mul.getOperand(0);
-  SDValue Ext1 = Mul.getOperand(1);
-  if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
-      Ext1.getOpcode() != ISD::SIGN_EXTEND)
-    return SDValue();
-  EVT VecVT = Ext0.getOperand(0).getValueType();
-  if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8)
-    return SDValue();
-  if (Ext1.getOperand(0).getValueType() != VecVT ||
-      VecVT.getScalarType() != ScalarType ||
-      VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
-    return SDValue();
-
-  SDLoc DL(Mul);
-  SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0),
-                                Ext1.getOperand(0));
-  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH);
-}
-
+static SDValue PerformSELECTCombine(SDNode *N, 
+                                    TargetLowering::DAGCombinerInfo &DCI, 
+                                    const ARMSubtarget *Subtarget) { 
+  if (!Subtarget->hasMVEIntegerOps()) 
+    return SDValue(); 
+ 
+  SDLoc dl(N); 
+  SDValue SetCC; 
+  SDValue LHS; 
+  SDValue RHS; 
+  ISD::CondCode CC; 
+  SDValue TrueVal; 
+  SDValue FalseVal; 
+ 
+  if (N->getOpcode() == ISD::SELECT && 
+      N->getOperand(0)->getOpcode() == ISD::SETCC) { 
+    SetCC = N->getOperand(0); 
+    LHS = SetCC->getOperand(0); 
+    RHS = SetCC->getOperand(1); 
+    CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); 
+    TrueVal = N->getOperand(1); 
+    FalseVal = N->getOperand(2); 
+  } else if (N->getOpcode() == ISD::SELECT_CC) { 
+    LHS = N->getOperand(0); 
+    RHS = N->getOperand(1); 
+    CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 
+    TrueVal = N->getOperand(2); 
+    FalseVal = N->getOperand(3); 
+  } else { 
+    return SDValue(); 
+  } 
+ 
+  unsigned int Opcode = 0; 
+  if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN || 
+       FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) && 
+      (CC == ISD::SETULT || CC == ISD::SETUGT)) { 
+    Opcode = ARMISD::VMINVu; 
+    if (CC == ISD::SETUGT) 
+      std::swap(TrueVal, FalseVal); 
+  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN || 
+              FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) && 
+             (CC == ISD::SETLT || CC == ISD::SETGT)) { 
+    Opcode = ARMISD::VMINVs; 
+    if (CC == ISD::SETGT) 
+      std::swap(TrueVal, FalseVal); 
+  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX || 
+              FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) && 
+             (CC == ISD::SETUGT || CC == ISD::SETULT)) { 
+    Opcode = ARMISD::VMAXVu; 
+    if (CC == ISD::SETULT) 
+      std::swap(TrueVal, FalseVal); 
+  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX || 
+              FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) && 
+             (CC == ISD::SETGT || CC == ISD::SETLT)) { 
+    Opcode = ARMISD::VMAXVs; 
+    if (CC == ISD::SETLT) 
+      std::swap(TrueVal, FalseVal); 
+  } else 
+    return SDValue(); 
+ 
+  // Normalise to the right hand side being the vector reduction 
+  switch (TrueVal->getOpcode()) { 
+  case ISD::VECREDUCE_UMIN: 
+  case ISD::VECREDUCE_SMIN: 
+  case ISD::VECREDUCE_UMAX: 
+  case ISD::VECREDUCE_SMAX: 
+    std::swap(LHS, RHS); 
+    std::swap(TrueVal, FalseVal); 
+    break; 
+  } 
+ 
+  EVT VectorType = FalseVal->getOperand(0).getValueType(); 
+ 
+  if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 && 
+      VectorType != MVT::v4i32) 
+    return SDValue(); 
+ 
+  EVT VectorScalarType = VectorType.getVectorElementType(); 
+ 
+  // The values being selected must also be the ones being compared 
+  if (TrueVal != LHS || FalseVal != RHS) 
+    return SDValue(); 
+ 
+  EVT LeftType = LHS->getValueType(0); 
+  EVT RightType = RHS->getValueType(0); 
+ 
+  // The types must match the reduced type too 
+  if (LeftType != VectorScalarType || RightType != VectorScalarType) 
+    return SDValue(); 
+ 
+  // Legalise the scalar to an i32 
+  if (VectorScalarType != MVT::i32) 
+    LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); 
+ 
+  // Generate the reduction as an i32 for legalisation purposes 
+  auto Reduction = 
+      DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0)); 
+ 
+  // The result isn't actually an i32 so truncate it back to its original type 
+  if (VectorScalarType != MVT::i32) 
+    Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction); 
+ 
+  return Reduction; 
+} 
+ 
+// A special combine for the vqdmulh family of instructions. This is one of the 
+// potential set of patterns that could patch this instruction. The base pattern 
+// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))). 
+// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))), 
+// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as 
+// the max is unnecessary. 
+static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) { 
+  EVT VT = N->getValueType(0); 
+  SDValue Shft; 
+  ConstantSDNode *Clamp; 
+ 
+  if (N->getOpcode() == ISD::SMIN) { 
+    Shft = N->getOperand(0); 
+    Clamp = isConstOrConstSplat(N->getOperand(1)); 
+  } else if (N->getOpcode() == ISD::VSELECT) { 
+    // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin. 
+    SDValue Cmp = N->getOperand(0); 
+    if (Cmp.getOpcode() != ISD::SETCC || 
+        cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT || 
+        Cmp.getOperand(0) != N->getOperand(1) || 
+        Cmp.getOperand(1) != N->getOperand(2)) 
+      return SDValue(); 
+    Shft = N->getOperand(1); 
+    Clamp = isConstOrConstSplat(N->getOperand(2)); 
+  } else 
+    return SDValue(); 
+ 
+  if (!Clamp) 
+    return SDValue(); 
+ 
+  MVT ScalarType; 
+  int ShftAmt = 0; 
+  switch (Clamp->getSExtValue()) { 
+  case (1 << 7) - 1: 
+    ScalarType = MVT::i8; 
+    ShftAmt = 7; 
+    break; 
+  case (1 << 15) - 1: 
+    ScalarType = MVT::i16; 
+    ShftAmt = 15; 
+    break; 
+  case (1ULL << 31) - 1: 
+    ScalarType = MVT::i32; 
+    ShftAmt = 31; 
+    break; 
+  default: 
+    return SDValue(); 
+  } 
+ 
+  if (Shft.getOpcode() != ISD::SRA) 
+    return SDValue(); 
+  ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1)); 
+  if (!N1 || N1->getSExtValue() != ShftAmt) 
+    return SDValue(); 
+ 
+  SDValue Mul = Shft.getOperand(0); 
+  if (Mul.getOpcode() != ISD::MUL) 
+    return SDValue(); 
+ 
+  SDValue Ext0 = Mul.getOperand(0); 
+  SDValue Ext1 = Mul.getOperand(1); 
+  if (Ext0.getOpcode() != ISD::SIGN_EXTEND || 
+      Ext1.getOpcode() != ISD::SIGN_EXTEND) 
+    return SDValue(); 
+  EVT VecVT = Ext0.getOperand(0).getValueType(); 
+  if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8) 
+    return SDValue(); 
+  if (Ext1.getOperand(0).getValueType() != VecVT || 
+      VecVT.getScalarType() != ScalarType || 
+      VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2) 
+    return SDValue(); 
+ 
+  SDLoc DL(Mul); 
+  SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0), 
+                                Ext1.getOperand(0)); 
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH); 
+} 
+ 
 static SDValue PerformVSELECTCombine(SDNode *N,
                                      TargetLowering::DAGCombinerInfo &DCI,
                                      const ARMSubtarget *Subtarget) {
-  if (!Subtarget->hasMVEIntegerOps())
-    return SDValue();
-
-  if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
-    return V;
-
+  if (!Subtarget->hasMVEIntegerOps()) 
+    return SDValue(); 
+ 
+  if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG)) 
+    return V; 
+ 
   // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
   //
   // We need to re-implement this optimization here as the implementation in the
@@ -12456,14 +12456,14 @@ static SDValue PerformADDVecReduce(SDNode *N,
     return M;
   if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
     return M;
-  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
-    return M;
-  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
-    return M;
-  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
-    return M;
-  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
-    return M;
+  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1)) 
+    return M; 
+  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1)) 
+    return M; 
+  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0)) 
+    return M; 
+  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0)) 
+    return M; 
   return SDValue();
 }
 
@@ -13358,7 +13358,7 @@ static SDValue PerformORCombine(SDNode *N,
                 // Canonicalize the vector type to make instruction selection
                 // simpler.
                 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
-                SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
+                SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT, 
                                              N0->getOperand(1),
                                              N0->getOperand(0),
                                              N1->getOperand(0));
@@ -13669,12 +13669,12 @@ static SDValue PerformVMOVrhCombine(SDNode *N,
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // fold (VMOVrh (fpconst x)) -> const x
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
-    APFloat V = C->getValueAPF();
-    return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
-  }
-
+  // fold (VMOVrh (fpconst x)) -> const x 
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) { 
+    APFloat V = C->getValueAPF(); 
+    return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT); 
+  } 
+ 
   // fold (VMOVrh (load x)) -> (zextload (i16*)x)
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -13849,23 +13849,23 @@ PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
     return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
   }
 
-  // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
-  // more VPNOT which might get folded as else predicates.
-  if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
-    SDValue X =
-        DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
-    SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
-                                DCI.DAG.getConstant(65535, dl, MVT::i32));
-    return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
-  }
-
-  // Only the bottom 16 bits of the source register are used.
-  if (Op.getValueType() == MVT::i32) {
-    APInt DemandedMask = APInt::getLowBitsSet(32, 16);
-    const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
-    if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
-      return SDValue(N, 0);
-  }
+  // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce 
+  // more VPNOT which might get folded as else predicates. 
+  if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) { 
+    SDValue X = 
+        DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0)); 
+    SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, 
+                                DCI.DAG.getConstant(65535, dl, MVT::i32)); 
+    return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C); 
+  } 
+ 
+  // Only the bottom 16 bits of the source register are used. 
+  if (Op.getValueType() == MVT::i32) { 
+    APInt DemandedMask = APInt::getLowBitsSet(32, 16); 
+    const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo(); 
+    if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI)) 
+      return SDValue(N, 0); 
+  } 
   return SDValue();
 }
 
@@ -14078,13 +14078,13 @@ static SDValue CombineBaseUpdate(SDNode *N,
         NumVecs = 3; break;
       case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
         NumVecs = 4; break;
-      case Intrinsic::arm_neon_vld1x2:
-      case Intrinsic::arm_neon_vld1x3:
-      case Intrinsic::arm_neon_vld1x4:
+      case Intrinsic::arm_neon_vld1x2: 
+      case Intrinsic::arm_neon_vld1x3: 
+      case Intrinsic::arm_neon_vld1x4: 
       case Intrinsic::arm_neon_vld2dup:
       case Intrinsic::arm_neon_vld3dup:
       case Intrinsic::arm_neon_vld4dup:
-        // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
+        // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip 
         // combining base updates for such intrinsics.
         continue;
       case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
@@ -14676,39 +14676,39 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
   // use the VMOVN over splitting the store. We are looking for patterns of:
   // !rev: 0 N 1 N+1 2 N+2 ...
   //  rev: N 0 N+1 1 N+2 2 ...
-  // The shuffle may either be a single source (in which case N = NumElts/2) or
-  // two inputs extended with concat to the same size (in which case N =
-  // NumElts).
-  auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
-    ArrayRef<int> M = SVN->getMask();
+  // The shuffle may either be a single source (in which case N = NumElts/2) or 
+  // two inputs extended with concat to the same size (in which case N = 
+  // NumElts). 
+  auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) { 
+    ArrayRef<int> M = SVN->getMask(); 
     unsigned NumElts = ToVT.getVectorNumElements();
-    if (SVN->getOperand(1).isUndef())
-      NumElts /= 2;
+    if (SVN->getOperand(1).isUndef()) 
+      NumElts /= 2; 
 
-    unsigned Off0 = Rev ? NumElts : 0;
-    unsigned Off1 = Rev ? 0 : NumElts;
+    unsigned Off0 = Rev ? NumElts : 0; 
+    unsigned Off1 = Rev ? 0 : NumElts; 
 
-    for (unsigned I = 0; I < NumElts; I += 2) {
-      if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
+    for (unsigned I = 0; I < NumElts; I += 2) { 
+      if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2)) 
         return false;
-      if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
+      if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2)) 
         return false;
     }
 
     return true;
   };
 
-  // It may be preferable to keep the store unsplit as the trunc may end up
-  // being removed. Check that here.
-  if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) {
-    if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) {
-      DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U);
-      return SDValue();
-    }
-  }
-  if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
-    if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
+  // It may be preferable to keep the store unsplit as the trunc may end up 
+  // being removed. Check that here. 
+  if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) { 
+    if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) { 
+      DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U); 
       return SDValue();
+    } 
+  } 
+  if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0))) 
+    if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true)) 
+      return SDValue(); 
 
   LLVMContext &C = *DAG.getContext();
   SDLoc DL(St);
@@ -14728,8 +14728,8 @@ static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
   SmallVector<SDValue, 4> Stores;
   for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
     unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
-    SDValue NewPtr =
-        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+    SDValue NewPtr = 
+        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset)); 
 
     SDValue Extract =
         DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
@@ -14782,15 +14782,15 @@ static SDValue PerformSTORECombine(SDNode *N,
     SDValue BasePtr = St->getBasePtr();
     SDValue NewST1 = DAG.getStore(
         St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
-        BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
+        BasePtr, St->getPointerInfo(), St->getOriginalAlign(), 
         St->getMemOperand()->getFlags());
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                     DAG.getConstant(4, DL, MVT::i32));
     return DAG.getStore(NewST1.getValue(0), DL,
                         StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
-                        OffsetPtr, St->getPointerInfo().getWithOffset(4),
-                        St->getOriginalAlign(),
+                        OffsetPtr, St->getPointerInfo().getWithOffset(4), 
+                        St->getOriginalAlign(), 
                         St->getMemOperand()->getFlags());
   }
 
@@ -14964,107 +14964,107 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
   //   VADDLV u/s 32
   //   VMLALV u/s 16/32
 
-  // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
-  // extend it and use v4i32 instead.
-  auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
-    EVT AVT = A.getValueType();
-    if (!AVT.is128BitVector())
-      A = DAG.getNode(ExtendCode, dl,
-                      AVT.changeVectorElementType(MVT::getIntegerVT(
-                          128 / AVT.getVectorMinNumElements())),
-                      A);
-    return A;
-  };
+  // If the input vector is smaller than legal (v4i8/v4i16 for example) we can 
+  // extend it and use v4i32 instead. 
+  auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) { 
+    EVT AVT = A.getValueType(); 
+    if (!AVT.is128BitVector()) 
+      A = DAG.getNode(ExtendCode, dl, 
+                      AVT.changeVectorElementType(MVT::getIntegerVT( 
+                          128 / AVT.getVectorMinNumElements())), 
+                      A); 
+    return A; 
+  }; 
   auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
     if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
       return SDValue();
     SDValue A = N0->getOperand(0);
     if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
-      return ExtendIfNeeded(A, ExtendCode);
-    return SDValue();
-  };
-  auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
-                         ArrayRef<MVT> ExtTypes, SDValue &Mask) {
-    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
-        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
-      return SDValue();
-    Mask = N0->getOperand(0);
-    SDValue Ext = N0->getOperand(1);
-    if (Ext->getOpcode() != ExtendCode)
-      return SDValue();
-    SDValue A = Ext->getOperand(0);
-    if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
-      return ExtendIfNeeded(A, ExtendCode);
+      return ExtendIfNeeded(A, ExtendCode); 
     return SDValue();
   };
+  auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode, 
+                         ArrayRef<MVT> ExtTypes, SDValue &Mask) { 
+    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT || 
+        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode())) 
+      return SDValue(); 
+    Mask = N0->getOperand(0); 
+    SDValue Ext = N0->getOperand(1); 
+    if (Ext->getOpcode() != ExtendCode) 
+      return SDValue(); 
+    SDValue A = Ext->getOperand(0); 
+    if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; })) 
+      return ExtendIfNeeded(A, ExtendCode); 
+    return SDValue(); 
+  }; 
   auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
                      SDValue &A, SDValue &B) {
-    // For a vmla we are trying to match a larger pattern:
-    // ExtA = sext/zext A
-    // ExtB = sext/zext B
-    // Mul = mul ExtA, ExtB
-    // vecreduce.add Mul
-    // There might also be en extra extend between the mul and the addreduce, so
-    // long as the bitwidth is high enough to make them equivalent (for example
-    // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
-    if (ResVT != RetTy)
+    // For a vmla we are trying to match a larger pattern: 
+    // ExtA = sext/zext A 
+    // ExtB = sext/zext B 
+    // Mul = mul ExtA, ExtB 
+    // vecreduce.add Mul 
+    // There might also be en extra extend between the mul and the addreduce, so 
+    // long as the bitwidth is high enough to make them equivalent (for example 
+    // original v8i16 might be mul at v8i32 and the reduce happens at v8i64). 
+    if (ResVT != RetTy) 
       return false;
-    SDValue Mul = N0;
-    if (Mul->getOpcode() == ExtendCode &&
-        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
-            ResVT.getScalarSizeInBits())
-      Mul = Mul->getOperand(0);
-    if (Mul->getOpcode() != ISD::MUL)
-      return false;
-    SDValue ExtA = Mul->getOperand(0);
-    SDValue ExtB = Mul->getOperand(1);
+    SDValue Mul = N0; 
+    if (Mul->getOpcode() == ExtendCode && 
+        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >= 
+            ResVT.getScalarSizeInBits()) 
+      Mul = Mul->getOperand(0); 
+    if (Mul->getOpcode() != ISD::MUL) 
+      return false; 
+    SDValue ExtA = Mul->getOperand(0); 
+    SDValue ExtB = Mul->getOperand(1); 
     if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
       return false;
     A = ExtA->getOperand(0);
     B = ExtB->getOperand(0);
     if (A.getValueType() == B.getValueType() &&
-        llvm::any_of(ExtTypes,
-                     [&A](MVT Ty) { return A.getValueType() == Ty; })) {
-      A = ExtendIfNeeded(A, ExtendCode);
-      B = ExtendIfNeeded(B, ExtendCode);
+        llvm::any_of(ExtTypes, 
+                     [&A](MVT Ty) { return A.getValueType() == Ty; })) { 
+      A = ExtendIfNeeded(A, ExtendCode); 
+      B = ExtendIfNeeded(B, ExtendCode); 
       return true;
-    }
-    return false;
-  };
-  auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
-                     SDValue &A, SDValue &B, SDValue &Mask) {
-    // Same as the pattern above with a select for the zero predicated lanes
-    // ExtA = sext/zext A
-    // ExtB = sext/zext B
-    // Mul = mul ExtA, ExtB
-    // N0 = select Mask, Mul, 0
-    // vecreduce.add N0
-    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
-        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
-      return false;
-    Mask = N0->getOperand(0);
-    SDValue Mul = N0->getOperand(1);
-    if (Mul->getOpcode() == ExtendCode &&
-        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
-            ResVT.getScalarSizeInBits())
-      Mul = Mul->getOperand(0);
-    if (Mul->getOpcode() != ISD::MUL)
-      return false;
-    SDValue ExtA = Mul->getOperand(0);
-    SDValue ExtB = Mul->getOperand(1);
-    if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
-      return false;
-    A = ExtA->getOperand(0);
-    B = ExtB->getOperand(0);
-    if (A.getValueType() == B.getValueType() &&
-        llvm::any_of(ExtTypes,
-                     [&A](MVT Ty) { return A.getValueType() == Ty; })) {
-      A = ExtendIfNeeded(A, ExtendCode);
-      B = ExtendIfNeeded(B, ExtendCode);
-      return true;
-    }
+    } 
     return false;
   };
+  auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes, 
+                     SDValue &A, SDValue &B, SDValue &Mask) { 
+    // Same as the pattern above with a select for the zero predicated lanes 
+    // ExtA = sext/zext A 
+    // ExtB = sext/zext B 
+    // Mul = mul ExtA, ExtB 
+    // N0 = select Mask, Mul, 0 
+    // vecreduce.add N0 
+    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT || 
+        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode())) 
+      return false; 
+    Mask = N0->getOperand(0); 
+    SDValue Mul = N0->getOperand(1); 
+    if (Mul->getOpcode() == ExtendCode && 
+        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >= 
+            ResVT.getScalarSizeInBits()) 
+      Mul = Mul->getOperand(0); 
+    if (Mul->getOpcode() != ISD::MUL) 
+      return false; 
+    SDValue ExtA = Mul->getOperand(0); 
+    SDValue ExtB = Mul->getOperand(1); 
+    if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode) 
+      return false; 
+    A = ExtA->getOperand(0); 
+    B = ExtB->getOperand(0); 
+    if (A.getValueType() == B.getValueType() && 
+        llvm::any_of(ExtTypes, 
+                     [&A](MVT Ty) { return A.getValueType() == Ty; })) { 
+      A = ExtendIfNeeded(A, ExtendCode); 
+      B = ExtendIfNeeded(B, ExtendCode); 
+      return true; 
+    } 
+    return false; 
+  }; 
   auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
     SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
@@ -15075,93 +15075,93 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
   if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
     return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
-  if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND,
-                          {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
+  if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, 
+                          {MVT::v4i8, MVT::v4i16, MVT::v4i32})) 
     return Create64bitNode(ARMISD::VADDLVs, {A});
-  if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND,
-                          {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
+  if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, 
+                          {MVT::v4i8, MVT::v4i16, MVT::v4i32})) 
     return Create64bitNode(ARMISD::VADDLVu, {A});
-  if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
-  if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
-
-  SDValue Mask;
-  if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
-    return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
-  if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
-    return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
-  if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND,
-                              {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
-    return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
-  if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND,
-                              {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
-    return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
-  if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
-  if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
-
+  if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8})) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A)); 
+  if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8})) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A)); 
+
+  SDValue Mask; 
+  if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask)) 
+    return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask); 
+  if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask)) 
+    return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask); 
+  if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, 
+                              {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask)) 
+    return Create64bitNode(ARMISD::VADDLVps, {A, Mask}); 
+  if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, 
+                              {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask)) 
+    return Create64bitNode(ARMISD::VADDLVpu, {A, Mask}); 
+  if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask)); 
+  if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask)); 
+ 
   SDValue A, B;
   if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
     return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
   if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
     return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
-  if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND,
-              {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
+  if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, 
+              {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B)) 
     return Create64bitNode(ARMISD::VMLALVs, {A, B});
-  if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND,
-              {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
+  if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, 
+              {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B)) 
     return Create64bitNode(ARMISD::VMLALVu, {A, B});
-  if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
-  if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
-
-  if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
-    return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
-  if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
-    return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
-  if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND,
-                  {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
-                  B, Mask))
-    return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
-  if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND,
-                  {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
-                  B, Mask))
-    return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
-  if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
-  if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
-    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
-                       DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
-
-  // Some complications. We can get a case where the two inputs of the mul are
-  // the same, then the output sext will have been helpfully converted to a
-  // zext. Turn it back.
-  SDValue Op = N0;
-  if (Op->getOpcode() == ISD::VSELECT)
-    Op = Op->getOperand(1);
-  if (Op->getOpcode() == ISD::ZERO_EXTEND &&
-      Op->getOperand(0)->getOpcode() == ISD::MUL) {
-    SDValue Mul = Op->getOperand(0);
-    if (Mul->getOperand(0) == Mul->getOperand(1) &&
-        Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
-      SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
-      if (Op != N0)
-        Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
-                          N0->getOperand(0), Ext, N0->getOperand(2));
-      return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
-    }
-  }
-
+  if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B)); 
+  if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B)); 
+ 
+  if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask)) 
+    return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask); 
+  if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask)) 
+    return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask); 
+  if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, 
+                  {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, 
+                  B, Mask)) 
+    return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask}); 
+  if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, 
+                  {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, 
+                  B, Mask)) 
+    return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask}); 
+  if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask)); 
+  if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask)) 
+    return DAG.getNode(ISD::TRUNCATE, dl, ResVT, 
+                       DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask)); 
+ 
+  // Some complications. We can get a case where the two inputs of the mul are 
+  // the same, then the output sext will have been helpfully converted to a 
+  // zext. Turn it back. 
+  SDValue Op = N0; 
+  if (Op->getOpcode() == ISD::VSELECT) 
+    Op = Op->getOperand(1); 
+  if (Op->getOpcode() == ISD::ZERO_EXTEND && 
+      Op->getOperand(0)->getOpcode() == ISD::MUL) { 
+    SDValue Mul = Op->getOperand(0); 
+    if (Mul->getOperand(0) == Mul->getOperand(1) && 
+        Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) { 
+      SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul); 
+      if (Op != N0) 
+        Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0), 
+                          N0->getOperand(0), Ext, N0->getOperand(2)); 
+      return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext); 
+    } 
+  } 
+ 
   return SDValue();
 }
 
@@ -15613,13 +15613,13 @@ static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
   SmallVector<SDValue, 4> Chains;
   for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
     unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
-    SDValue NewPtr =
-        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
+    SDValue NewPtr = 
+        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset)); 
 
     SDValue NewLoad =
         DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
                     LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
-                    Alignment, MMOFlags, AAInfo);
+                    Alignment, MMOFlags, AAInfo); 
     Loads.push_back(NewLoad);
     Chains.push_back(SDValue(NewLoad.getNode(), 1));
   }
@@ -15707,9 +15707,9 @@ static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
   if (!ST->hasMVEIntegerOps())
     return SDValue();
 
-  if (SDValue V = PerformVQDMULHCombine(N, DAG))
-    return V;
-
+  if (SDValue V = PerformVQDMULHCombine(N, DAG)) 
+    return V; 
+ 
   if (VT != MVT::v4i32 && VT != MVT::v8i16)
     return SDValue();
 
@@ -16317,8 +16317,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
-  case ISD::SELECT_CC:
-  case ISD::SELECT:     return PerformSELECTCombine(N, DCI, Subtarget);
+  case ISD::SELECT_CC: 
+  case ISD::SELECT:     return PerformSELECTCombine(N, DCI, Subtarget); 
   case ISD::VSELECT:    return PerformVSELECTCombine(N, DCI, Subtarget);
   case ISD::ABS:        return PerformABSCombine(N, DCI, Subtarget);
   case ARMISD::ADDE:    return PerformADDECombine(N, DCI, Subtarget);
@@ -16735,19 +16735,19 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
         switch (II->getIntrinsicID()) {
         case Intrinsic::fma:
           return !IsFMS(I);
-        case Intrinsic::arm_mve_add_predicated:
-        case Intrinsic::arm_mve_mul_predicated:
-        case Intrinsic::arm_mve_qadd_predicated:
-        case Intrinsic::arm_mve_hadd_predicated:
-        case Intrinsic::arm_mve_vqdmull_predicated:
-        case Intrinsic::arm_mve_qdmulh_predicated:
-        case Intrinsic::arm_mve_qrdmulh_predicated:
-        case Intrinsic::arm_mve_fma_predicated:
-          return true;
-        case Intrinsic::arm_mve_sub_predicated:
-        case Intrinsic::arm_mve_qsub_predicated:
-        case Intrinsic::arm_mve_hsub_predicated:
-          return Operand == 1;
+        case Intrinsic::arm_mve_add_predicated: 
+        case Intrinsic::arm_mve_mul_predicated: 
+        case Intrinsic::arm_mve_qadd_predicated: 
+        case Intrinsic::arm_mve_hadd_predicated: 
+        case Intrinsic::arm_mve_vqdmull_predicated: 
+        case Intrinsic::arm_mve_qdmulh_predicated: 
+        case Intrinsic::arm_mve_qrdmulh_predicated: 
+        case Intrinsic::arm_mve_fma_predicated: 
+          return true; 
+        case Intrinsic::arm_mve_sub_predicated: 
+        case Intrinsic::arm_mve_qsub_predicated: 
+        case Intrinsic::arm_mve_hsub_predicated: 
+          return Operand == 1; 
         default:
           return false;
         }
@@ -17476,7 +17476,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       return;
 
     KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
-    Known = KnownBits::commonBits(Known, KnownRHS);
+    Known = KnownBits::commonBits(Known, KnownRHS); 
     return;
   }
   case ISD::INTRINSIC_W_CHAIN: {
@@ -18349,9 +18349,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
     return false;
   if (VT == MVT::f16 && Subtarget->hasFullFP16())
     return ARM_AM::getFP16Imm(Imm) != -1;
-  if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
-      ARM_AM::getFP32FP16Imm(Imm) != -1)
-    return true;
+  if (VT == MVT::f32 && Subtarget->hasFullFP16() && 
+      ARM_AM::getFP32FP16Imm(Imm) != -1) 
+    return true; 
   if (VT == MVT::f32)
     return ARM_AM::getFP32Imm(Imm) != -1;
   if (VT == MVT::f64 && Subtarget->hasFP64())
@@ -18661,8 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
              : AtomicExpansionKind::None;
 }
 
-// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32
-// bits, and up to 64 bits on the non-M profiles.
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32 
+// bits, and up to 64 bits on the non-M profiles. 
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18670,11 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
+  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); 
   bool HasAtomicCmpXchg =
       !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
-      Size <= (Subtarget->isMClass() ? 32U : 64U))
+  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && 
+      Size <= (Subtarget->isMClass() ? 32U : 64U)) 
     return AtomicExpansionKind::LLSC;
   return AtomicExpansionKind::None;
 }
@@ -19129,7 +19129,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
 
       SmallVector<Value *, 6> Ops;
       Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
-      append_range(Ops, Shuffles);
+      append_range(Ops, Shuffles); 
       Ops.push_back(Builder.getInt32(SI->getAlignment()));
       Builder.CreateCall(VstNFunc, Ops);
     } else {
@@ -19145,7 +19145,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
 
       SmallVector<Value *, 6> Ops;
       Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
-      append_range(Ops, Shuffles);
+      append_range(Ops, Shuffles); 
       for (unsigned F = 0; F < Factor; F++) {
         Ops.push_back(Builder.getInt32(F));
         Builder.CreateCall(VstNFunc, Ops);
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
index 61a127af07..5b04ba8c6d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMISelLowering.h
@@ -216,37 +216,37 @@ class VectorType;
       VMULLs,       // ...signed
       VMULLu,       // ...unsigned
 
-      VQDMULH,      // MVE vqdmulh instruction
-
+      VQDMULH,      // MVE vqdmulh instruction 
+ 
       // MVE reductions
       VADDVs,       // sign- or zero-extend the elements of a vector to i32,
       VADDVu,       //   add them all together, and return an i32 of their sum
-      VADDVps,      // Same as VADDV[su] but with a v4i1 predicate mask
-      VADDVpu,
+      VADDVps,      // Same as VADDV[su] but with a v4i1 predicate mask 
+      VADDVpu, 
       VADDLVs,      // sign- or zero-extend elements to i64 and sum, returning
       VADDLVu,      //   the low and high 32-bit halves of the sum
-      VADDLVAs,     // Same as VADDLV[su] but also add an input accumulator
+      VADDLVAs,     // Same as VADDLV[su] but also add an input accumulator 
       VADDLVAu,     //   provided as low and high halves
-      VADDLVps,     // Same as VADDLV[su] but with a v4i1 predicate mask
-      VADDLVpu,
-      VADDLVAps,    // Same as VADDLVp[su] but with a v4i1 predicate mask
-      VADDLVApu,
-      VMLAVs,       // sign- or zero-extend the elements of two vectors to i32, multiply them
-      VMLAVu,       //   and add the results together, returning an i32 of their sum
-      VMLAVps,      // Same as VMLAV[su] with a v4i1 predicate mask
-      VMLAVpu,
-      VMLALVs,      // Same as VMLAV but with i64, returning the low and
-      VMLALVu,      //   high 32-bit halves of the sum
-      VMLALVps,     // Same as VMLALV[su] with a v4i1 predicate mask
-      VMLALVpu,
-      VMLALVAs,     // Same as VMLALV but also add an input accumulator
-      VMLALVAu,     //   provided as low and high halves
-      VMLALVAps,    // Same as VMLALVA[su] with a v4i1 predicate mask
-      VMLALVApu,
-      VMINVu,        // Find minimum unsigned value of a vector and register
-      VMINVs,        // Find minimum signed value of a vector and register
-      VMAXVu,        // Find maximum unsigned value of a vector and register
-      VMAXVs,        // Find maximum signed value of a vector and register
+      VADDLVps,     // Same as VADDLV[su] but with a v4i1 predicate mask 
+      VADDLVpu, 
+      VADDLVAps,    // Same as VADDLVp[su] but with a v4i1 predicate mask 
+      VADDLVApu, 
+      VMLAVs,       // sign- or zero-extend the elements of two vectors to i32, multiply them 
+      VMLAVu,       //   and add the results together, returning an i32 of their sum 
+      VMLAVps,      // Same as VMLAV[su] with a v4i1 predicate mask 
+      VMLAVpu, 
+      VMLALVs,      // Same as VMLAV but with i64, returning the low and 
+      VMLALVu,      //   high 32-bit halves of the sum 
+      VMLALVps,     // Same as VMLALV[su] with a v4i1 predicate mask 
+      VMLALVpu, 
+      VMLALVAs,     // Same as VMLALV but also add an input accumulator 
+      VMLALVAu,     //   provided as low and high halves 
+      VMLALVAps,    // Same as VMLALVA[su] with a v4i1 predicate mask 
+      VMLALVApu, 
+      VMINVu,        // Find minimum unsigned value of a vector and register 
+      VMINVs,        // Find minimum signed value of a vector and register 
+      VMAXVu,        // Find maximum unsigned value of a vector and register 
+      VMAXVs,        // Find maximum signed value of a vector and register 
 
       SMULWB,       // Signed multiply word by half word, bottom
       SMULWT,       // Signed multiply word by half word, top
@@ -285,8 +285,8 @@ class VectorType;
       // Vector AND with NOT of immediate
       VBICIMM,
 
-      // Pseudo vector bitwise select
-      VBSP,
+      // Pseudo vector bitwise select 
+      VBSP, 
 
       // Pseudo-instruction representing a memory copy using ldm/stm
       // instructions.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
index 85da7c5a53..7937353678 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrFormats.td
@@ -403,9 +403,9 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
   bit isUnaryDataProc = 0;
   bit canXformTo16Bit = 0;
   // The instruction is a 16-bit flag setting Thumb instruction. Used
-  // by the parser and if-converter to determine whether to require the 'S'
-  // suffix on the mnemonic (when not in an IT block) or preclude it (when
-  // in an IT block).
+  // by the parser and if-converter to determine whether to require the 'S' 
+  // suffix on the mnemonic (when not in an IT block) or preclude it (when 
+  // in an IT block). 
   bit thumbArithFlagSetting = 0;
 
   bit validForTailPredication = 0;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
index 8dcb319923..2fe8cbc613 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrInfo.td
@@ -162,9 +162,9 @@ def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
                               [SDNPInGlue]>;
 def ARMsubs          : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
 
-def ARMssat   : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
+def ARMssat   : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; 
 
-def ARMusat   : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
+def ARMusat   : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; 
 
 def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
                               [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -371,11 +371,11 @@ def imm_not_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
-// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1
-def asr_imm_XFORM : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32);
-}]>;
-
+// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1 
+def asr_imm_XFORM : SDNodeXForm<imm, [{ 
+  return CurDAG->getTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32); 
+}]>; 
+ 
 /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
 def imm16_31 : ImmLeaf<i32, [{
   return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
@@ -442,8 +442,8 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
 def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
 
-def asr_imm : ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }], asr_imm_XFORM>;
-
+def asr_imm : ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }], asr_imm_XFORM>; 
+ 
 //===----------------------------------------------------------------------===//
 // NEON/MVE pattern fragments
 //
@@ -496,19 +496,19 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
 }]>;
 
 
-def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>;
-def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>;
-def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>;
-def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>;
-
-def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{
-  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
-  unsigned EltBits = 0;
-  uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits);
-  return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01);
-}]>;
-
+def ARMimmAllZerosV: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 0))))>; 
+def ARMimmAllZerosD: PatLeaf<(bitconvert (v2i32 (ARMvmovImm (i32 0))))>; 
+def ARMimmAllOnesV: PatLeaf<(bitconvert (v16i8 (ARMvmovImm (i32 0xEFF))))>; 
+def ARMimmAllOnesD: PatLeaf<(bitconvert (v8i8 (ARMvmovImm (i32 0xEFF))))>; 
 
+def ARMimmOneV: PatLeaf<(ARMvmovImm (i32 timm)), [{ 
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 
+  unsigned EltBits = 0; 
+  uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); 
+  return (EltBits == N->getValueType(0).getScalarSizeInBits() && EltVal == 0x01); 
+}]>; 
+ 
+ 
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
@@ -822,9 +822,9 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
 def arm_i32imm : IntImmLeaf<i32, [{
   if (Subtarget->useMovt())
     return true;
-  if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue()))
-    return true;
-  return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue());
+  if (ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue())) 
+    return true; 
+  return ARM_AM::isSOImmTwoPartValNeg(Imm.getZExtValue()); 
 }]>;
 
 /// imm0_1 predicate - Immediate in the range [0,1].
@@ -2492,30 +2492,30 @@ let isCall = 1,
   }
 
   // ARMv5T and above
-  def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>,
+  def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", []>, 
             Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
     bits<4> func;
     let Inst{31-4} = 0b1110000100101111111111110011;
     let Inst{3-0}  = func;
   }
-  def BLX_noip :  ARMPseudoExpand<(outs), (ins GPRnoip:$func),
-                   4, IIC_Br, [], (BLX GPR:$func)>,
-                  Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
-
+  def BLX_noip :  ARMPseudoExpand<(outs), (ins GPRnoip:$func), 
+                   4, IIC_Br, [], (BLX GPR:$func)>, 
+                  Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>; 
 
+ 
   def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm,
-                    IIC_Br, "blx", "\t$func", []>,
+                    IIC_Br, "blx", "\t$func", []>, 
                  Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> {
     bits<4> func;
     let Inst{27-4} = 0b000100101111111111110011;
     let Inst{3-0}  = func;
   }
-  def BLX_pred_noip :  ARMPseudoExpand<(outs), (ins GPRnoip:$func),
-                   4, IIC_Br, [],
-                   (BLX_pred GPR:$func, (ops 14, zero_reg))>,
-                   Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>;
-
+  def BLX_pred_noip :  ARMPseudoExpand<(outs), (ins GPRnoip:$func), 
+                   4, IIC_Br, [], 
+                   (BLX_pred GPR:$func, (ops 14, zero_reg))>, 
+                   Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]>; 
 
+ 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func),
@@ -2540,16 +2540,16 @@ let isCall = 1,
              Requires<[IsARM]>, Sched<[WriteBr]>;
 }
 
-def : ARMPat<(ARMcall GPR:$func), (BLX $func)>,
-      Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>,
-      Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
-def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>,
-      Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>,
-      Requires<[IsARM, HasV5T, SLSBLRMitigation]>;
-
-
+def : ARMPat<(ARMcall GPR:$func), (BLX $func)>, 
+      Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>; 
+def : ARMPat<(ARMcall GPRnoip:$func), (BLX_noip $func)>, 
+      Requires<[IsARM, HasV5T, SLSBLRMitigation]>; 
+def : ARMPat<(ARMcall_pred GPR:$func), (BLX_pred $func)>, 
+      Requires<[IsARM, HasV5T, NoSLSBLRMitigation]>; 
+def : ARMPat<(ARMcall_pred GPRnoip:$func), (BLX_pred_noip $func)>, 
+      Requires<[IsARM, HasV5T, SLSBLRMitigation]>; 
+ 
+ 
 let isBranch = 1, isTerminator = 1 in {
   // FIXME: should be able to write a pattern for ARMBrcond, but can't use
   // a two-value operand where a dag node expects two operands. :(
@@ -4089,32 +4089,32 @@ def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
                (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
 def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
                (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
-def : ARMPat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMssat GPRnopc:$Rn, imm0_31:$imm), 
              (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : ARMPat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
+def : ARMPat<(ARMusat GPRnopc:$Rn, imm0_31:$imm), 
              (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
 def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
                (SSAT16 imm1_16:$pos, GPRnopc:$a)>;
 def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
                (USAT16 imm0_15:$pos, GPRnopc:$a)>;
-def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
-               (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
-               (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
-               (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
-               (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : ARMPat<(ARMssat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
-               (SSAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;                            
-def : ARMPat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
-               (SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-def : ARMPat<(ARMusat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos),
-               (USAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;  
-def : ARMPat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
-               (USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-
-
+def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos), 
+               (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos), 
+               (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>; 
+def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), 
+               (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos), 
+               (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>; 
+def : ARMPat<(ARMssat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos), 
+               (SSAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;                             
+def : ARMPat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos), 
+               (SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>; 
+def : ARMPat<(ARMusat (shl GPRnopc:$Rn, imm0_31:$shft), imm0_31:$pos), 
+               (USAT imm0_31:$pos, GPRnopc:$Rn, imm0_31:$shft)>;   
+def : ARMPat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos), 
+               (USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>; 
+
+ 
 //===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
 //
@@ -6381,15 +6381,15 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
                        NoItinerary,
                        [(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
 
-// SpeculationBarrierEndBB must only be used after an unconditional control
-// flow, i.e. after a terminator for which isBarrier is True.
-let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
-  def SpeculationBarrierISBDSBEndBB
-      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-  def SpeculationBarrierSBEndBB
-      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-}
-
+// SpeculationBarrierEndBB must only be used after an unconditional control 
+// flow, i.e. after a terminator for which isBarrier is True. 
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 
+  def SpeculationBarrierISBDSBEndBB 
+      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; 
+  def SpeculationBarrierSBEndBB 
+      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; 
+} 
+ 
 //===----------------------------------
 // Atomic cmpxchg for -O0
 //===----------------------------------
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
index 0dfea68887..64cef5d967 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrMVE.td
@@ -318,78 +318,78 @@ def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?,     v4i1,  ?,    0b11, "f", ?>;
 def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
 def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1,  v4i1, 0b11, "p", 0b1>;
 
-multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
-                            dag PredOperands, Instruction Inst,
-                            SDPatternOperator IdentityVec = null_frag> {
-  // Unpredicated
-  def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
-            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-
-  // Predicated with select
-  if !ne(VTI.Size, 0b11) then {
-    def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
-                                (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
-                                             (VTI.Vec MQPR:$Qn))),
-                                (VTI.Vec MQPR:$inactive))),
-              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
-                              ARMVCCThen, (VTI.Pred VCCR:$mask),
-                              (VTI.Vec MQPR:$inactive)))>;
-
-    // Optionally with the select folded through the op
-    def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
-                           (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
-                                             (VTI.Vec MQPR:$Qn),
-                                             (VTI.Vec IdentityVec))))),
-              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
-                              ARMVCCThen, (VTI.Pred VCCR:$mask),
-                              (VTI.Vec MQPR:$Qm)))>;
-  }
-
-  // Predicated with intrinsic
-  def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)),
-                          PredOperands,
-                          (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
-            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
-                            ARMVCCThen, (VTI.Pred VCCR:$mask),
-                            (VTI.Vec MQPR:$inactive)))>;
-}
-
-multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt,
-                               dag PredOperands, Instruction Inst,
-                               SDPatternOperator IdentityVec = null_frag> {
-  // Unpredicated
-  def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))),
-            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>;
-
-  // Predicated with select
-  if !ne(VTI.Size, 0b11) then {
-    def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask),
-                                (VTI.Vec (Op (VTI.Vec MQPR:$Qm),
-                                             (VTI.Vec (ARMvdup rGPR:$Rn)))),
-                                (VTI.Vec MQPR:$inactive))),
-              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
-                              ARMVCCThen, (VTI.Pred VCCR:$mask),
-                              (VTI.Vec MQPR:$inactive)))>;
-
-    // Optionally with the select folded through the op
-    def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm),
-                           (VTI.Vec (vselect (VTI.Pred VCCR:$mask),
-                                             (ARMvdup rGPR:$Rn),
-                                             (VTI.Vec IdentityVec))))),
-              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
-                              ARMVCCThen, (VTI.Pred VCCR:$mask),
-                              (VTI.Vec MQPR:$Qm)))>;
-  }
-
-  // Predicated with intrinsic
-  def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))),
-                          PredOperands,
-                          (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
-            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
-                            ARMVCCThen, (VTI.Pred VCCR:$mask),
-                            (VTI.Vec MQPR:$inactive)))>;
-}
-
+multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt, 
+                            dag PredOperands, Instruction Inst, 
+                            SDPatternOperator IdentityVec = null_frag> { 
+  // Unpredicated 
+  def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), 
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; 
+ 
+  // Predicated with select 
+  if !ne(VTI.Size, 0b11) then { 
+    def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask), 
+                                (VTI.Vec (Op (VTI.Vec MQPR:$Qm), 
+                                             (VTI.Vec MQPR:$Qn))), 
+                                (VTI.Vec MQPR:$inactive))), 
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), 
+                              ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                              (VTI.Vec MQPR:$inactive)))>; 
+ 
+    // Optionally with the select folded through the op 
+    def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), 
+                           (VTI.Vec (vselect (VTI.Pred VCCR:$mask), 
+                                             (VTI.Vec MQPR:$Qn), 
+                                             (VTI.Vec IdentityVec))))), 
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), 
+                              ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                              (VTI.Vec MQPR:$Qm)))>; 
+  } 
+ 
+  // Predicated with intrinsic 
+  def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), 
+                          PredOperands, 
+                          (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))), 
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), 
+                            ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                            (VTI.Vec MQPR:$inactive)))>; 
+} 
+ 
+multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, PatFrag Op, Intrinsic PredInt, 
+                               dag PredOperands, Instruction Inst, 
+                               SDPatternOperator IdentityVec = null_frag> { 
+  // Unpredicated 
+  def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn)))), 
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn))>; 
+ 
+  // Predicated with select 
+  if !ne(VTI.Size, 0b11) then { 
+    def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$mask), 
+                                (VTI.Vec (Op (VTI.Vec MQPR:$Qm), 
+                                             (VTI.Vec (ARMvdup rGPR:$Rn)))), 
+                                (VTI.Vec MQPR:$inactive))), 
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn, 
+                              ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                              (VTI.Vec MQPR:$inactive)))>; 
+ 
+    // Optionally with the select folded through the op 
+    def : Pat<(VTI.Vec (Op (VTI.Vec MQPR:$Qm), 
+                           (VTI.Vec (vselect (VTI.Pred VCCR:$mask), 
+                                             (ARMvdup rGPR:$Rn), 
+                                             (VTI.Vec IdentityVec))))), 
+              (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn, 
+                              ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                              (VTI.Vec MQPR:$Qm)))>; 
+  } 
+ 
+  // Predicated with intrinsic 
+  def : Pat<(VTI.Vec !con((PredInt (VTI.Vec MQPR:$Qm), (VTI.Vec (ARMvdup rGPR:$Rn))), 
+                          PredOperands, 
+                          (? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))), 
+            (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn, 
+                            ARMVCCThen, (VTI.Pred VCCR:$mask), 
+                            (VTI.Vec MQPR:$inactive)))>; 
+} 
+ 
 // --------- Start of base classes for the instructions themselves
 
 class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -450,7 +450,7 @@ class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
   : MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
   let Inst{31-20} = 0b111010100101;
   let Inst{8} = 0b1;
-  let validForTailPredication=1;
+  let validForTailPredication=1; 
 }
 
 class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
@@ -684,13 +684,13 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
   let validForTailPredication = 1;
 }
 
-def SDTVecReduceP : SDTypeProfile<1, 2, [    // VADDLVp
-  SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
-]>;
+def SDTVecReduceP : SDTypeProfile<1, 2, [    // VADDLVp 
+  SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2> 
+]>; 
 def ARMVADDVs       : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
 def ARMVADDVu       : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
-def ARMVADDVps      : SDNode<"ARMISD::VADDVps", SDTVecReduceP>;
-def ARMVADDVpu      : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>;
+def ARMVADDVps      : SDNode<"ARMISD::VADDVps", SDTVecReduceP>; 
+def ARMVADDVpu      : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>; 
 
 multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
   def acc    : MVE_VADDV<"vaddva", VTI.Suffix,
@@ -707,39 +707,39 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
     if VTI.Unsigned then {
       def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
                 (i32 (InstN $vec))>;
-      def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                                      (VTI.Vec MQPR:$vec),
-                                                      (VTI.Vec ARMimmAllZerosV))))),
-                (i32 (InstN $vec, ARMVCCThen, $pred))>;
+      def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                                      (VTI.Vec MQPR:$vec), 
+                                                      (VTI.Vec ARMimmAllZerosV))))), 
+                (i32 (InstN $vec, ARMVCCThen, $pred))>; 
       def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
                 (i32 (InstN $vec))>;
-      def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
-                (i32 (InstN $vec, ARMVCCThen, $pred))>;
+      def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))), 
+                (i32 (InstN $vec, ARMVCCThen, $pred))>; 
       def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
                           (i32 tGPREven:$acc))),
                 (i32 (InstA $acc, $vec))>;
-      def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                                                (VTI.Vec MQPR:$vec),
-                                                                (VTI.Vec ARMimmAllZerosV))))),
-                          (i32 tGPREven:$acc))),
-                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+      def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                                                (VTI.Vec MQPR:$vec), 
+                                                                (VTI.Vec ARMimmAllZerosV))))), 
+                          (i32 tGPREven:$acc))), 
+                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>; 
       def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
                           (i32 tGPREven:$acc))),
                 (i32 (InstA $acc, $vec))>;
-      def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
-                          (i32 tGPREven:$acc))),
-                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+      def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))), 
+                          (i32 tGPREven:$acc))), 
+                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>; 
     } else {
       def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
                 (i32 (InstN $vec))>;
       def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
                           (i32 tGPREven:$acc))),
                 (i32 (InstA $acc, $vec))>;
-      def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
-                (i32 (InstN $vec, ARMVCCThen, $pred))>;
-      def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
-                          (i32 tGPREven:$acc))),
-                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+      def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))), 
+                (i32 (InstN $vec, ARMVCCThen, $pred))>; 
+      def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))), 
+                          (i32 tGPREven:$acc))), 
+                (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>; 
     }
 
     def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
@@ -944,14 +944,14 @@ multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
   defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
 }
 
-def SDTVecReduceR : SDTypeProfile<1, 2, [   // Reduction of an integer and vector into an integer
-  SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
-]>;
-def ARMVMINVu       : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
-def ARMVMINVs       : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
-def ARMVMAXVu       : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
-def ARMVMAXVs       : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;
-
+def SDTVecReduceR : SDTypeProfile<1, 2, [   // Reduction of an integer and vector into an integer 
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> 
+]>; 
+def ARMVMINVu       : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; 
+def ARMVMINVs       : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; 
+def ARMVMAXVu       : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; 
+def ARMVMAXVs       : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; 
+ 
 defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
 defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
 
@@ -982,32 +982,32 @@ let Predicates = [HasMVEInt] in {
   def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
             (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;
 
-  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
-            (i32 (MVE_VMINVu8 $x, $src))>;
-  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
-            (i32 (MVE_VMINVu16 $x, $src))>;
-  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
-            (i32 (MVE_VMINVu32 $x, $src))>;
-  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
-            (i32 (MVE_VMINVs8 $x, $src))>;
-  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
-            (i32 (MVE_VMINVs16 $x, $src))>;
-  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
-            (i32 (MVE_VMINVs32 $x, $src))>;
-
-  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
-            (i32 (MVE_VMAXVu8 $x, $src))>;
-  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
-            (i32 (MVE_VMAXVu16 $x, $src))>;
-  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
-            (i32 (MVE_VMAXVu32 $x, $src))>;
-  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
-            (i32 (MVE_VMAXVs8 $x, $src))>;
-  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
-            (i32 (MVE_VMAXVs16 $x, $src))>;
-  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
-            (i32 (MVE_VMAXVs32 $x, $src))>;
-
+  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))), 
+            (i32 (MVE_VMINVu8 $x, $src))>; 
+  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))), 
+            (i32 (MVE_VMINVu16 $x, $src))>; 
+  def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))), 
+            (i32 (MVE_VMINVu32 $x, $src))>; 
+  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))), 
+            (i32 (MVE_VMINVs8 $x, $src))>; 
+  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))), 
+            (i32 (MVE_VMINVs16 $x, $src))>; 
+  def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))), 
+            (i32 (MVE_VMINVs32 $x, $src))>; 
+ 
+  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))), 
+            (i32 (MVE_VMAXVu8 $x, $src))>; 
+  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))), 
+            (i32 (MVE_VMAXVu16 $x, $src))>; 
+  def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))), 
+            (i32 (MVE_VMAXVu32 $x, $src))>; 
+  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))), 
+            (i32 (MVE_VMAXVs8 $x, $src))>; 
+  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))), 
+            (i32 (MVE_VMAXVs16 $x, $src))>; 
+  def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))), 
+            (i32 (MVE_VMAXVs32 $x, $src))>; 
+ 
 }
 
 multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
@@ -1139,28 +1139,28 @@ def SDTVecReduce2LA : SDTypeProfile<2, 4, [    // VMLALVA
   SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
   SDTCisVec<4>, SDTCisVec<5>
 ]>;
-def SDTVecReduce2P : SDTypeProfile<1, 3, [    // VMLAV
-  SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>
-]>;
-def SDTVecReduce2LP : SDTypeProfile<2, 3, [    // VMLALV
-  SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>
-]>;
-def SDTVecReduce2LAP : SDTypeProfile<2, 5, [    // VMLALVA
-  SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
-  SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6>
-]>;
+def SDTVecReduce2P : SDTypeProfile<1, 3, [    // VMLAV 
+  SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3> 
+]>; 
+def SDTVecReduce2LP : SDTypeProfile<2, 3, [    // VMLALV 
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4> 
+]>; 
+def SDTVecReduce2LAP : SDTypeProfile<2, 5, [    // VMLALVA 
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>, 
+  SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6> 
+]>; 
 def ARMVMLAVs       : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
 def ARMVMLAVu       : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
 def ARMVMLALVs      : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
 def ARMVMLALVu      : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
-def ARMVMLALVAs     : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
-def ARMVMLALVAu     : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
-def ARMVMLAVps      : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>;
-def ARMVMLAVpu      : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>;
-def ARMVMLALVps     : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>;
-def ARMVMLALVpu     : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>;
-def ARMVMLALVAps    : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>;
-def ARMVMLALVApu    : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>;
+def ARMVMLALVAs     : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>; 
+def ARMVMLALVAu     : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>; 
+def ARMVMLAVps      : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>; 
+def ARMVMLAVpu      : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>; 
+def ARMVMLALVps     : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>; 
+def ARMVMLALVpu     : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>; 
+def ARMVMLALVAps    : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>; 
+def ARMVMLALVApu    : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>; 
 
 let Predicates = [HasMVEInt] in {
   def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
@@ -1179,68 +1179,68 @@ let Predicates = [HasMVEInt] in {
             (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
 
   def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
-                      (i32 tGPREven:$src3))),
+                      (i32 tGPREven:$src3))), 
             (i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
   def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
-                      (i32 tGPREven:$src3))),
+                      (i32 tGPREven:$src3))), 
             (i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
   def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
             (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
             (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
-                      (i32 tGPREven:$src3))),
+                      (i32 tGPREven:$src3))), 
             (i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
   def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
             (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
   def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
             (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-
-  // Predicated
-  def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
-                                         (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
-                                         (v4i32 ARMimmAllZerosV)))),
-            (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
-                                         (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
-                                         (v8i16 ARMimmAllZerosV)))),
-            (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
-            (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
-            (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
-                                         (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
-                                         (v16i8 ARMimmAllZerosV)))),
-            (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
-            (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
-            (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
-
-  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
-                                                   (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
-                                                   (v4i32 ARMimmAllZerosV)))),
-                      (i32 tGPREven:$src3))),
-            (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
-                                                   (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
-                                                   (v8i16 ARMimmAllZerosV)))),
-                      (i32 tGPREven:$src3))),
-            (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
-            (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
-            (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
-                                                   (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
-                                                   (v16i8 ARMimmAllZerosV)))),
-                      (i32 tGPREven:$src3))),
-            (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
-            (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
-  def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
-            (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ 
+  // Predicated 
+  def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred), 
+                                         (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)), 
+                                         (v4i32 ARMimmAllZerosV)))), 
+            (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred), 
+                                         (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)), 
+                                         (v8i16 ARMimmAllZerosV)))), 
+            (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))), 
+            (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))), 
+            (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred), 
+                                         (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)), 
+                                         (v16i8 ARMimmAllZerosV)))), 
+            (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))), 
+            (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))), 
+            (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>; 
+ 
+  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred), 
+                                                   (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)), 
+                                                   (v4i32 ARMimmAllZerosV)))), 
+                      (i32 tGPREven:$src3))), 
+            (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred), 
+                                                   (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)), 
+                                                   (v8i16 ARMimmAllZerosV)))), 
+                      (i32 tGPREven:$src3))), 
+            (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)), 
+            (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)), 
+            (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred), 
+                                                   (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)), 
+                                                   (v16i8 ARMimmAllZerosV)))), 
+                      (i32 tGPREven:$src3))), 
+            (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)), 
+            (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>; 
+  def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)), 
+            (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>; 
 }
 
 // vmlav aliases vmladav
@@ -1360,25 +1360,25 @@ let Predicates = [HasMVEInt] in {
             (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
   def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
             (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
-
-  // Predicated
-  def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
-            (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
-            (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
-            (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
-            (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
-
-  def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
-            (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
-            (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
-            (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
-  def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
-            (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ 
+  // Predicated 
+  def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)), 
+            (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)), 
+            (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), 
+            (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), 
+            (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>; 
+ 
+  def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)), 
+            (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)), 
+            (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), 
+            (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>; 
+  def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), 
+            (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>; 
 }
 
 // vmlalv aliases vmlaldav
@@ -1426,7 +1426,7 @@ class MVE_comp<InstrItinClass itin, string iname, string suffix,
 }
 
 class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
-                    list<dag> pattern=[]>
+                    list<dag> pattern=[]> 
   : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
 
   let Inst{28} = 0b1;
@@ -1442,18 +1442,18 @@ class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
   let Predicates = [HasMVEFloat];
 }
 
-multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
-  def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
+multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> { 
+  def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>; 
 
-  let Predicates = [HasMVEFloat] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
-  }
+  let Predicates = [HasMVEFloat] in { 
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>; 
+  } 
 }
 
-defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>;
-defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>;
-defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>;
+defm MVE_VMAXNMf32 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v4f32, fmaxnum, int_arm_mve_max_predicated>; 
+defm MVE_VMAXNMf16 : MVE_VMINMAXNM_m<"vmaxnm", 0b0, MVE_v8f16, fmaxnum, int_arm_mve_max_predicated>; 
+defm MVE_VMINNMf32 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v4f32, fminnum, int_arm_mve_min_predicated>; 
+defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_mve_min_predicated>; 
 
 
 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
@@ -1472,11 +1472,11 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
 }
 
 multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
-                      SDNode Op, Intrinsic PredInt> {
+                      SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>; 
   }
 }
 
@@ -1649,39 +1649,39 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
         (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
 }
 
-let Predicates = [HasMVEInt] in {
-  defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
-  defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
-  defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
-  defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>;
-
-  defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>;
-
-  defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
-  defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>;
-
-  defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
-  defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
-  defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
-  defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>;
-
-  defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_orn_predicated, (? ), MVE_VORN>;
-  defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_orn_predicated, (? ), MVE_VORN>;
-  defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_orn_predicated, (? ), MVE_VORN>;
-  defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>,
-                          int_arm_mve_orn_predicated, (? ), MVE_VORN>;
+let Predicates = [HasMVEInt] in { 
+  defm : MVE_TwoOpPattern<MVE_v16i8, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>; 
+  defm : MVE_TwoOpPattern<MVE_v8i16, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>; 
+  defm : MVE_TwoOpPattern<MVE_v4i32, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>; 
+  defm : MVE_TwoOpPattern<MVE_v2i64, and, int_arm_mve_and_predicated, (? ), MVE_VAND, ARMimmAllOnesV>; 
+
+  defm : MVE_TwoOpPattern<MVE_v16i8, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v8i16, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v4i32, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v2i64, or, int_arm_mve_orr_predicated, (? ), MVE_VORR, ARMimmAllZerosV>; 
+
+  defm : MVE_TwoOpPattern<MVE_v16i8, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v8i16, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v4i32, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>; 
+  defm : MVE_TwoOpPattern<MVE_v2i64, xor, int_arm_mve_eor_predicated, (? ), MVE_VEOR, ARMimmAllZerosV>; 
+
+  defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>; 
+  defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>; 
+  defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>; 
+  defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(and node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_bic_predicated, (? ), MVE_VBIC>; 
+
+  defm : MVE_TwoOpPattern<MVE_v16i8, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_orn_predicated, (? ), MVE_VORN>; 
+  defm : MVE_TwoOpPattern<MVE_v8i16, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_orn_predicated, (? ), MVE_VORN>; 
+  defm : MVE_TwoOpPattern<MVE_v4i32, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_orn_predicated, (? ), MVE_VORN>; 
+  defm : MVE_TwoOpPattern<MVE_v2i64, BinOpFrag<(or node:$LHS, (vnotq node:$RHS))>, 
+                          int_arm_mve_orn_predicated, (? ), MVE_VORN>; 
 }
 
 class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
@@ -1718,8 +1718,8 @@ multiclass MVE_bit_cmode_p<string iname, bit opcode,
   defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
 
   let Predicates = [HasMVEInt] in {
-    def : Pat<UnpredPat,
-              (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+    def : Pat<UnpredPat, 
+              (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>; 
     def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
                           UnpredPat, (VTI.Vec MQPR:$src))),
               (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
@@ -1929,18 +1929,18 @@ class MVE_VMULt1<string iname, string suffix, bits<2> size,
   let validForTailPredication = 1;
 }
 
-multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> {
-  def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>;
+multiclass MVE_VMUL_m<MVEVectorVTInfo VTI> { 
+  def "" : MVE_VMULt1<"vmul", VTI.Suffix, VTI.Size>; 
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ),
-                            !cast<Instruction>(NAME), ARMimmOneV>;
+    defm : MVE_TwoOpPattern<VTI, mul, int_arm_mve_mul_predicated, (? ), 
+                            !cast<Instruction>(NAME), ARMimmOneV>; 
   }
 }
 
-defm MVE_VMULi8  : MVE_VMUL_m<MVE_v16i8>;
-defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>;
-defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>;
+defm MVE_VMULi8  : MVE_VMUL_m<MVE_v16i8>; 
+defm MVE_VMULi16 : MVE_VMUL_m<MVE_v8i16>; 
+defm MVE_VMULi32 : MVE_VMUL_m<MVE_v4i32>; 
 
 class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
                   list<dag> pattern=[]>
@@ -1952,30 +1952,30 @@ class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
   let Inst{12-8} = 0b01011;
   let Inst{4} = 0b0;
   let Inst{0} = 0b0;
-  let validForTailPredication = 1;
+  let validForTailPredication = 1; 
 }
 
-def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>;
-
+def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>; 
+ 
 multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
-                      SDNode Op, Intrinsic unpred_int, Intrinsic pred_int,
+                      SDNode Op, Intrinsic unpred_int, Intrinsic pred_int, 
                       bit rounding> {
   def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>;
-
-    // Extra unpredicated multiply intrinsic patterns
-    def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+    defm : MVE_TwoOpPattern<VTI, Op, pred_int, (? ), Inst>; 
+ 
+    // Extra unpredicated multiply intrinsic patterns 
+    def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), 
               (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
   }
 }
 
 multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
-  : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag,
-                                             MVEvqdmulh),
-                               !if(rounding, int_arm_mve_vqrdmulh,
+  : MVE_VQxDMULH_m<iname, VTI, !if(rounding, null_frag, 
+                                             MVEvqdmulh), 
+                               !if(rounding, int_arm_mve_vqrdmulh, 
                                              int_arm_mve_vqdmulh),
                                !if(rounding, int_arm_mve_qrdmulh_predicated,
                                              int_arm_mve_qdmulh_predicated),
@@ -2003,12 +2003,12 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
 }
 
 multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
-                         SDNode Op, Intrinsic PredInt> {
+                         SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>; 
   }
 }
 
@@ -2046,13 +2046,13 @@ class MVE_VQSUB_<string suffix, bit U, bits<2> size>
   : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
 
 multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
-                      SDNode Op, Intrinsic PredInt> {
+                      SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
-                            !cast<Instruction>(NAME)>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), 
+                            !cast<Instruction>(NAME)>; 
   }
 }
 
@@ -2067,13 +2067,13 @@ defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
 defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
 
 multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
-                      SDNode Op, Intrinsic PredInt> {
+                      SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
-                            !cast<Instruction>(NAME)>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), 
+                            !cast<Instruction>(NAME)>; 
   }
 }
 
@@ -2199,32 +2199,32 @@ defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
 // modelling that here with these patterns, but we're using no wrap forms of
 // add to ensure that the extra bit of information is not needed for the
 // arithmetic or the rounding.
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
-                                        (v16i8 (ARMvmovImm (i32 3585)))),
-                                (i32 1))),
-            (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
-                                        (v8i16 (ARMvmovImm (i32 2049)))),
-                                (i32 1))),
-            (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
-                                        (v4i32 (ARMvmovImm (i32 1)))),
-                                (i32 1))),
-            (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
-                                        (v16i8 (ARMvmovImm (i32 3585)))),
-                                (i32 1))),
-            (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
-                                        (v8i16 (ARMvmovImm (i32 2049)))),
-                                (i32 1))),
-            (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
-                                        (v4i32 (ARMvmovImm (i32 1)))),
-                                (i32 1))),
-            (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
-}
+let Predicates = [HasMVEInt] in { 
+  def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)), 
+                                        (v16i8 (ARMvmovImm (i32 3585)))), 
+                                (i32 1))), 
+            (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)), 
+                                        (v8i16 (ARMvmovImm (i32 2049)))), 
+                                (i32 1))), 
+            (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)), 
+                                        (v4i32 (ARMvmovImm (i32 1)))), 
+                                (i32 1))), 
+            (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)), 
+                                        (v16i8 (ARMvmovImm (i32 3585)))), 
+                                (i32 1))), 
+            (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)), 
+                                        (v8i16 (ARMvmovImm (i32 2049)))), 
+                                (i32 1))), 
+            (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)), 
+                                        (v4i32 (ARMvmovImm (i32 1)))), 
+                                (i32 1))), 
+            (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>; 
+} 
 
 
 class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
@@ -2473,9 +2473,9 @@ multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
 
   let Predicates = [HasMVEInt] in {
     // VQABS and VQNEG have more difficult isel patterns defined elsewhere
-    if !not(saturate) then {
-      def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
-                (VTI.Vec (Inst $v))>;
+    if !not(saturate) then { 
+      def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), 
+                (VTI.Vec (Inst $v))>; 
     }
 
     def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
@@ -3032,7 +3032,7 @@ multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
   defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
                            (imm:$imm));
 
-  def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)),
+  def : Pat<(OutVTI.Vec !setdagop(inparams, int_arm_mve_vshrn)), 
             (OutVTI.Vec outparams)>;
   def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
                                            (InVTI.Pred VCCR:$pred)))),
@@ -3234,7 +3234,7 @@ multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
   defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
   defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
 
-  def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
+  def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)), 
             (VTI.Vec outparams)>;
   def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
             (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
@@ -3586,12 +3586,12 @@ class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
 }
 
 multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
-                            SDNode Op, Intrinsic PredInt> {
+                            SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEFloat] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>; 
   }
 }
 
@@ -3682,23 +3682,23 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
 
   let Predicates = [HasMVEFloat] in {
     if fms then {
-      def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
-                (Inst $add, $m1, $m2)>;
-      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                  (VTI.Vec (fma (fneg m1), m2, add)),
-                                  add)),
-                (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+      def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), 
+                (Inst $add, $m1, $m2)>; 
+      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                  (VTI.Vec (fma (fneg m1), m2, add)), 
+                                  add)), 
+                (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; 
       def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
                 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
       def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
                 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
     } else {
-      def : Pat<(VTI.Vec (fma m1, m2, add)),
-                (Inst $add, $m1, $m2)>;
-      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                  (VTI.Vec (fma m1, m2, add)),
-                                  add)),
-                (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+      def : Pat<(VTI.Vec (fma m1, m2, add)), 
+                (Inst $add, $m1, $m2)>; 
+      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                  (VTI.Vec (fma m1, m2, add)), 
+                                  add)), 
+                (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; 
       def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
                 (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
     }
@@ -3711,14 +3711,14 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
 defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
 
 multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
-                            SDNode Op, Intrinsic PredInt> {
+                            SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
     let validForTailPredication = 1;
   }
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEFloat] in {
-    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+    defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>; 
   }
 }
 
@@ -3820,15 +3820,15 @@ multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
   : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
 
 defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
-defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>; 
-
-let Predicates = [HasMVEFloat] in {
-  def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
-            (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>;
-  def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))),
-            (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>;
-}
-
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;  
+
+let Predicates = [HasMVEFloat] in { 
+  def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))), 
+            (MVE_VABDf16 MQPR:$Qm, MQPR:$Qn)>; 
+  def : Pat<(v4f32 (fabs (fsub (v4f32 MQPR:$Qm), (v4f32 MQPR:$Qn)))), 
+            (MVE_VABDf32 MQPR:$Qm, MQPR:$Qn)>; 
+} 
+ 
 class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
                    Operand imm_operand_type>
   : MVE_float<"vcvt", suffix,
@@ -4047,8 +4047,8 @@ multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))),
-              (VTI.Vec (Inst $v))>;
+    def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), 
+              (VTI.Vec (Inst $v))>; 
     def : Pat<(VTI.Vec (pred_int  (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
                                   (VTI.Vec MQPR:$inactive))),
               (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
@@ -4083,8 +4083,8 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
   let Inst{4} = 0b0;
   let Inst{3-1} = Qm{2-0};
   let Inst{0} = 0b1;
-
-  let isCommutable = 1;
+ 
+  let isCommutable = 1; 
 }
 
 multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
@@ -4410,10 +4410,10 @@ let Predicates = [HasMVEInt] in {
 // vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
 def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
 
-def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
+def load_align4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 
+  return cast<LoadSDNode>(N)->getAlignment() >= 4; 
+}]>; 
+ 
 let Predicates = [HasMVEInt] in {
   foreach VT = [ v4i1, v8i1, v16i1 ] in {
     def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
@@ -4426,13 +4426,13 @@ let Predicates = [HasMVEInt] in {
                 (VT  (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
   }
 
-  // If we happen to be casting from a load we can convert that straight
-  // into a predicate load, so long as the load is of the correct type.
-  foreach VT = [ v4i1, v8i1, v16i1 ] in {
-    def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))),
-              (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>;
-  }
-
+  // If we happen to be casting from a load we can convert that straight 
+  // into a predicate load, so long as the load is of the correct type. 
+  foreach VT = [ v4i1, v8i1, v16i1 ] in { 
+    def : Pat<(VT (predicate_cast (i32 (load_align4 taddrmode_imm7<2>:$addr)))), 
+              (VT (VLDR_P0_off taddrmode_imm7<2>:$addr))>; 
+  } 
+ 
   // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
   // rather than the more general 'ARMVectorRegCast' which would also
   // match some bitconverts. If we use the latter in cases where the
@@ -4441,8 +4441,8 @@ let Predicates = [HasMVEInt] in {
 
   foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
     foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
-      def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))),
-                (VT MQPR:$src)>;
+      def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), 
+                (VT MQPR:$src)>; 
 }
 
 // end of MVE compares
@@ -4770,7 +4770,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
   let Inst{16} = 0b1;
   let Inst{12} = T;
   let Inst{8} = 0b0;
-  let Inst{7} = !not(bit_17);
+  let Inst{7} = !not(bit_17); 
   let Inst{0} = 0b1;
   let validForTailPredication = 1;
   let retainsPreviousHalfElement = 1;
@@ -4801,7 +4801,7 @@ multiclass MVE_VMOVN_p<Instruction Inst, bit top,
                                (VTI.Vec MQPR:$Qm), (i32 top))),
             (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
 
-  if !not(top) then {
+  if !not(top) then { 
     // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
     // lanes of a with the odd lanes of b. In other words, the lanes we're
     // _keeping_ from a are the even ones. So we can flip it round and say that
@@ -5173,11 +5173,11 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
 
 // Vector-scalar add/sub
 multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
-                            SDNode Op, Intrinsic PredInt> {
+                            SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
-  let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>;
-  }
+  let Predicates = [HasMVEInt] in { 
+    defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), ARMimmAllZerosV>; 
+  } 
 }
 
 multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
@@ -5196,35 +5196,35 @@ defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
 
 // Vector-scalar saturating add/sub
 multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
-                             SDNode Op, Intrinsic PredInt> {
+                             SDNode Op, Intrinsic PredInt> { 
   def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
                           0b0, VTI.Unsigned>;
-
-  let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)),
-                               !cast<Instruction>(NAME)>;
-  }
+ 
+  let Predicates = [HasMVEInt] in { 
+    defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), 
+                               !cast<Instruction>(NAME)>; 
+  } 
 }
 
-multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op>
-  : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>;
+multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI, SDNode Op> 
+  : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, Op, int_arm_mve_qadd_predicated>; 
 
-multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op>
-  : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>;
+multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI, SDNode Op> 
+  : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, Op, int_arm_mve_qsub_predicated>; 
 
-defm MVE_VQADD_qr_s8  : MVE_VQADD_qr_m<MVE_v16s8, saddsat>;
-defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>;
-defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>;
-defm MVE_VQADD_qr_u8  : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>;
-defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>;
-defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>;
+defm MVE_VQADD_qr_s8  : MVE_VQADD_qr_m<MVE_v16s8, saddsat>; 
+defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16, saddsat>; 
+defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32, saddsat>; 
+defm MVE_VQADD_qr_u8  : MVE_VQADD_qr_m<MVE_v16u8, uaddsat>; 
+defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16, uaddsat>; 
+defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32, uaddsat>; 
 
-defm MVE_VQSUB_qr_s8  : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>;
-defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>;
-defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>;
-defm MVE_VQSUB_qr_u8  : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>;
-defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>;
-defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
+defm MVE_VQSUB_qr_s8  : MVE_VQSUB_qr_m<MVE_v16s8, ssubsat>; 
+defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16, ssubsat>; 
+defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32, ssubsat>; 
+defm MVE_VQSUB_qr_u8  : MVE_VQSUB_qr_m<MVE_v16u8, usubsat>; 
+defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16, usubsat>; 
+defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>; 
 
 class MVE_VQDMULL_qr<string iname, string suffix, bit size,
                      bit T, string cstr="", list<dag> pattern=[]>
@@ -5315,23 +5315,23 @@ defm MVE_VHSUB_qr_u8  : MVE_VHSUB_qr_m<MVE_v16u8>;
 defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
 defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
 
-multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
-                            SDNode Op, Intrinsic PredInt> {
-  def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
-  defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
-                              !cast<Instruction>(NAME)>;
-}
-
+multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract, 
+                            SDNode Op, Intrinsic PredInt> { 
+  def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>; 
+  defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), 
+                              !cast<Instruction>(NAME)>; 
+} 
+ 
 let Predicates = [HasMVEFloat] in {
-  defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
-                                          int_arm_mve_add_predicated>;
-  defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
-                                          int_arm_mve_add_predicated>;
+  defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd, 
+                                          int_arm_mve_add_predicated>; 
+  defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd, 
+                                          int_arm_mve_add_predicated>; 
 
-  defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
-                                          int_arm_mve_sub_predicated>;
-  defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
-                                          int_arm_mve_sub_predicated>;
+  defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub, 
+                                          int_arm_mve_sub_predicated>; 
+  defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub, 
+                                          int_arm_mve_sub_predicated>; 
 }
 
 class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
@@ -5461,10 +5461,10 @@ class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
 
 multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
   def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
-  let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ),
-                               !cast<Instruction>(NAME), ARMimmOneV>;
-  }
+  let Predicates = [HasMVEInt] in { 
+    defm : MVE_TwoOpPatternDup<VTI, mul, int_arm_mve_mul_predicated, (? ), 
+                               !cast<Instruction>(NAME), ARMimmOneV>; 
+  } 
 }
 
 defm MVE_VMUL_qr_i8  : MVE_VMUL_qr_int_m<MVE_v16i8>;
@@ -5481,25 +5481,25 @@ class MVE_VxxMUL_qr<string iname, string suffix,
   let Inst{12} = 0b0;
   let Inst{8} = 0b0;
   let Inst{5} = 0b1;
-  let validForTailPredication = 1;
+  let validForTailPredication = 1; 
 }
 
 multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
-                           PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
+                           PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> { 
   def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
-
-  let Predicates = [HasMVEInt] in {
-    defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
-  }
-  defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>;
+ 
+  let Predicates = [HasMVEInt] in { 
+    defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>; 
+  } 
+  defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI, int_unpred, int_pred>; 
 }
 
 multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
-  MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh,
+  MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0, MVEvqdmulh, 
                   int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
 
 multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
-  MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag,
+  MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1, null_frag, 
                   int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
 
 defm MVE_VQDMULH_qr_s8    : MVE_VQDMULH_qr_m<MVE_v16s8>;
@@ -5510,17 +5510,17 @@ defm MVE_VQRDMULH_qr_s8   : MVE_VQRDMULH_qr_m<MVE_v16s8>;
 defm MVE_VQRDMULH_qr_s16  : MVE_VQRDMULH_qr_m<MVE_v8s16>;
 defm MVE_VQRDMULH_qr_s32  : MVE_VQRDMULH_qr_m<MVE_v4s32>;
 
-multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
-  let validForTailPredication = 1 in
-  def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
-  defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
-                             !cast<Instruction>(NAME)>;
+multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> { 
+  let validForTailPredication = 1 in 
+  def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>; 
+  defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ), 
+                             !cast<Instruction>(NAME)>; 
 }
 
-let Predicates = [HasMVEFloat] in {
-  defm MVE_VMUL_qr_f16   : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
-  defm MVE_VMUL_qr_f32   : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
-}
+let Predicates = [HasMVEFloat] in { 
+  defm MVE_VMUL_qr_f16   : MVE_VxxMUL_qr_f_m<MVE_v8f16>; 
+  defm MVE_VMUL_qr_f32   : MVE_VxxMUL_qr_f_m<MVE_v4f32>; 
+} 
 
 class MVE_VFMAMLA_qr<string iname, string suffix,
                      bit bit_28, bits<2> bits_21_20, bit S,
@@ -5595,10 +5595,10 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
     if scalar_addend then {
       def : Pat<(VTI.Vec (fma v1, v2, vs)),
                 (VTI.Vec (Inst v1, v2, is))>;
-      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                  (VTI.Vec (fma v1, v2, vs)),
-                                  v1)),
-                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                  (VTI.Vec (fma v1, v2, vs)), 
+                                  v1)), 
+                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; 
       def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
                 (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
     } else {
@@ -5606,14 +5606,14 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
                 (VTI.Vec (Inst v2, v1, is))>;
       def : Pat<(VTI.Vec (fma vs, v1, v2)),
                 (VTI.Vec (Inst v2, v1, is))>;
-      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                  (VTI.Vec (fma vs, v2, v1)),
-                                  v1)),
-                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
-      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
-                                  (VTI.Vec (fma v2, vs, v1)),
-                                  v1)),
-                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                  (VTI.Vec (fma vs, v2, v1)), 
+                                  v1)), 
+                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; 
+      def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), 
+                                  (VTI.Vec (fma v2, vs, v1)), 
+                                  v1)), 
+                (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>; 
       def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
                 (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
       def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
@@ -5742,7 +5742,7 @@ def MVE_VDWDUPu8  : MVE_VxWDUP<"vdwdup", "u8",  0b00, 0b1>;
 def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
 def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
 
-let isReMaterializable = 1 in
+let isReMaterializable = 1 in 
 class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
   : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
           "$Rn", vpred_n, "", pattern> {
@@ -5766,8 +5766,8 @@ multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
   defvar Inst = !cast<Instruction>(NAME);
 
   let Predicates = [HasMVEInt] in {
-    def : Pat<(intr rGPR:$Rn),
-              (VTI.Pred (Inst rGPR:$Rn))>;
+    def : Pat<(intr rGPR:$Rn), 
+              (VTI.Pred (Inst rGPR:$Rn))>; 
     def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
               (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
   }
@@ -5845,41 +5845,41 @@ def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
   let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
 }
 
-let Predicates = [HasMVEInt] in {
-  // Double lane moves. There are a number of patterns here. We know that the
-  // insertelt's will be in descending order by index, and need to match the 5
-  // patterns that might contain 2-0 or 3-1 pairs. These are:
-  // 3 2 1 0    -> vmovqrr 31; vmovqrr 20
-  // 3 2 1      -> vmovqrr 31; vmov 2
-  // 3 1        -> vmovqrr 31
-  // 2 1 0      -> vmovqrr 20; vmov 1
-  // 2 0        -> vmovqrr 20
-  // The other potential patterns will be handled by single lane inserts.
-  def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
-                                                        rGPR:$srcA, (i32 0)),
-                                             rGPR:$srcB, (i32 1)),
-                                  rGPR:$srcC, (i32 2)),
-                       rGPR:$srcD, (i32 3)),
-            (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)),
-                           rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
-  def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
-                                             rGPR:$srcB, (i32 1)),
-                                  rGPR:$srcC, (i32 2)),
-                       rGPR:$srcD, (i32 3)),
-            (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)),
-                           rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>;
-  def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)),
-            (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>;
-  def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1),
-                                             rGPR:$srcB, (i32 0)),
-                                  rGPR:$srcC, (i32 1)),
-                       rGPR:$srcD, (i32 2)),
-            (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)),
-                           rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>;
-  def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)),
-            (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>;
-}
-
+let Predicates = [HasMVEInt] in { 
+  // Double lane moves. There are a number of patterns here. We know that the 
+  // insertelt's will be in descending order by index, and need to match the 5 
+  // patterns that might contain 2-0 or 3-1 pairs. These are: 
+  // 3 2 1 0    -> vmovqrr 31; vmovqrr 20 
+  // 3 2 1      -> vmovqrr 31; vmov 2 
+  // 3 1        -> vmovqrr 31 
+  // 2 1 0      -> vmovqrr 20; vmov 1 
+  // 2 0        -> vmovqrr 20 
+  // The other potential patterns will be handled by single lane inserts. 
+  def : Pat<(insertelt (insertelt (insertelt (insertelt (v4i32 MQPR:$src1), 
+                                                        rGPR:$srcA, (i32 0)), 
+                                             rGPR:$srcB, (i32 1)), 
+                                  rGPR:$srcC, (i32 2)), 
+                       rGPR:$srcD, (i32 3)), 
+            (MVE_VMOV_q_rr (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcC, (i32 2), (i32 0)), 
+                           rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>; 
+  def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1), 
+                                             rGPR:$srcB, (i32 1)), 
+                                  rGPR:$srcC, (i32 2)), 
+                       rGPR:$srcD, (i32 3)), 
+            (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 2)), 
+                           rGPR:$srcB, rGPR:$srcD, (i32 3), (i32 1))>; 
+  def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 1)), rGPR:$srcB, (i32 3)), 
+            (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 3), (i32 1))>; 
+  def : Pat<(insertelt (insertelt (insertelt (v4i32 MQPR:$src1), 
+                                             rGPR:$srcB, (i32 0)), 
+                                  rGPR:$srcC, (i32 1)), 
+                       rGPR:$srcD, (i32 2)), 
+            (MVE_VMOV_q_rr (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$srcC, (i32 1)), 
+                           rGPR:$srcB, rGPR:$srcD, (i32 2), (i32 0))>; 
+  def : Pat<(insertelt (insertelt (v4i32 MQPR:$src1), rGPR:$srcA, (i32 0)), rGPR:$srcB, (i32 2)), 
+            (MVE_VMOV_q_rr MQPR:$src1, rGPR:$srcA, rGPR:$srcB, (i32 2), (i32 0))>; 
+} 
+ 
 // end of coproc mov
 
 // start of MVE interleaving load/store
@@ -5908,7 +5908,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
   let mayLoad = load;
   let mayStore = !eq(load,0);
   let hasSideEffects = 0;
-  let validForTailPredication = load;
+  let validForTailPredication = load; 
 }
 
 // A parameter class used to encapsulate all the ways the writeback
@@ -6518,7 +6518,7 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
   let Inst{4} = 0b0;
 
   let Defs = [VPR];
-  let validForTailPredication=1;
+  let validForTailPredication=1; 
 }
 
 class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -6631,7 +6631,7 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
 
   let Defs = [VPR];
   let Predicates = [HasMVEFloat];
-  let validForTailPredication=1;
+  let validForTailPredication=1; 
 }
 
 class MVE_VPTft1<string suffix, bit size>
@@ -7107,7 +7107,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
 
 class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
                                   PatFrag LoadKind, int shift>
-  : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
+  : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))), 
         (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
 
 multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
@@ -7274,11 +7274,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
             (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
 
   // Masked ext loads
-  def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+  def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))), 
             (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
-  def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+  def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))), 
             (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
-  def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
+  def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))), 
             (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
index a8c0d05d91..0f5d53b57d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td
@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
 def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
 def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
-def NEONvbsp      : SDNode<"ARMISD::VBSP",
+def NEONvbsp      : SDNode<"ARMISD::VBSP", 
                            SDTypeProfile<1, 3, [SDTCisVec<0>,
                                                 SDTCisSameAs<0, 1>,
                                                 SDTCisSameAs<0, 2>,
@@ -4197,10 +4197,10 @@ def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
 defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vaddl", "s", add, sext, 1>;
 defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
-                            "vaddl", "u", add, zanyext, 1>;
+                            "vaddl", "u", add, zanyext, 1>; 
 //   VADDW    : Vector Add Wide (Q = Q + D)
 defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
-defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
+defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 
 //   VHADD    : Vector Halving Add
 defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -4512,9 +4512,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
                                     (SubReg_i16_lane imm:$lane)))>;
   def : Pat<(v4i32 (saddsat
                      (v4i32 QPR:$src1),
-                     (v4i32 (int_arm_neon_vqrdmulh
+                     (v4i32 (int_arm_neon_vqrdmulh 
                               (v4i32 QPR:$src2),
-                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
+                              (v4i32 (ARMvduplane (v4i32 QPR:$src3), 
                                                    imm:$lane)))))),
             (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
@@ -4565,17 +4565,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
                               (v2i32 DPR:$Vn),
                               (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                    imm:$lane)))))),
-            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 
                                     imm:$lane))>;
   def : Pat<(v8i16 (ssubsat
                      (v8i16 QPR:$src1),
                      (v8i16 (int_arm_neon_vqrdmulh
                               (v8i16 QPR:$src2),
-                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
+                              (v8i16 (ARMvduplane (v8i16 QPR:$src3), 
                                                    imm:$lane)))))),
             (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
                                     (v8i16 QPR:$src2),
-                                    (v4i16 (EXTRACT_SUBREG
+                                    (v4i16 (EXTRACT_SUBREG 
                                              QPR:$src3,
                                              (DSubReg_i16_reg imm:$lane))),
                                     (SubReg_i16_lane imm:$lane)))>;
@@ -4587,7 +4587,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                                                     imm:$lane)))))),
             (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
-                                    (v2i32 (EXTRACT_SUBREG
+                                    (v2i32 (EXTRACT_SUBREG 
                                              QPR:$src3,
                                              (DSubReg_i32_reg imm:$lane))),
                                     (SubReg_i32_lane imm:$lane)))>;
@@ -5045,10 +5045,10 @@ def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
 defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vsubl", "s", sub, sext, 0>;
 defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
-                            "vsubl", "u", sub, zanyext, 0>;
+                            "vsubl", "u", sub, zanyext, 0>; 
 //   VSUBW    : Vector Subtract Wide (Q = Q - D)
 defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
-defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
+defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 
 //   VHSUB    : Vector Halving Subtract
 defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -5259,9 +5259,9 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
 // Vector Bitwise Operations.
 
 def vnotd : PatFrag<(ops node:$in),
-                    (xor node:$in, ARMimmAllOnesD)>;
+                    (xor node:$in, ARMimmAllOnesD)>; 
 def vnotq : PatFrag<(ops node:$in),
-                    (xor node:$in, ARMimmAllOnesV)>;
+                    (xor node:$in, ARMimmAllOnesV)>; 
 
 
 //   VAND     : Vector Bitwise AND
@@ -5428,84 +5428,84 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
 }
 
-// The TwoAddress pass will not go looking for equivalent operations
-// with different register constraints; it just inserts copies.
-// That is why pseudo VBSP implemented. Is is expanded later into
-// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
-def  VBSPd
-  : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
-                IIC_VBINiD, "",
-                [(set DPR:$Vd,
-                      (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+// The TwoAddress pass will not go looking for equivalent operations 
+// with different register constraints; it just inserts copies. 
+// That is why pseudo VBSP implemented. Is is expanded later into 
+// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 
+def  VBSPd 
+  : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 
+                IIC_VBINiD, "", 
+                [(set DPR:$Vd, 
+                      (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 
 let Predicates = [HasNEON] in {
 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
                                    (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
-          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 
 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
                                     (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
-          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 
 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
                                     (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
-          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 
 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
                                     (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
-          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 
 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
                                     (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
-          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 
 
 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 
 }
 
-def  VBSPq
-  : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
-                IIC_VBINiQ, "",
-                [(set QPR:$Vd,
-                      (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def  VBSPq 
+  : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 
+                IIC_VBINiQ, "", 
+                [(set QPR:$Vd, 
+                      (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 
 let Predicates = [HasNEON] in {
 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
                                    (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
-          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 
 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
                                     (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
-          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 
 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
                                     (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
-          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 
 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
                                     (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
-          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 
 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
                                     (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
-          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 
 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
-}
-
-//   VBSL     : Vector Bitwise Select
-def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
-                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
-                     N3RegFrm, IIC_VBINiD,
-                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     []>;
-
-def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
-                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
-                     N3RegFrm, IIC_VBINiQ,
-                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     []>;
-
+          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 
+}
+
+//   VBSL     : Vector Bitwise Select 
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 
+                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 
+                     N3RegFrm, IIC_VBINiD, 
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 
+                     []>; 
+ 
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 
+                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 
+                     N3RegFrm, IIC_VBINiQ, 
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 
+                     []>; 
+ 
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
 def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
@@ -6040,9 +6040,9 @@ defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
 // Vector Negate.
 
 def vnegd  : PatFrag<(ops node:$in),
-                     (sub ARMimmAllZerosD, node:$in)>;
+                     (sub ARMimmAllZerosD, node:$in)>; 
 def vnegq  : PatFrag<(ops node:$in),
-                     (sub ARMimmAllZerosV, node:$in)>;
+                     (sub ARMimmAllZerosV, node:$in)>; 
 
 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
   : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
@@ -6256,11 +6256,11 @@ defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
 
 let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
   def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
-                               [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
+                               [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 
                                (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
                Requires<[HasZCZ]>;
   def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
-                               [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
+                               [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 
                                (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
                Requires<[HasZCZ]>;
 }
@@ -7946,7 +7946,7 @@ let Predicates = [HasNEON,IsLE] in {
            (VLD1LNd16 addrmode6:$addr,
                       (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
 }
-// The following patterns are basically a copy of the patterns above,
+// The following patterns are basically a copy of the patterns above, 
 // however with an additional VREV16d instruction to convert data
 // loaded by VLD1LN into proper vector format in big endian mode.
 let Predicates = [HasNEON,IsBE] in {
@@ -9079,11 +9079,11 @@ multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
     (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
 }
 
-def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
-def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
+def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 
+def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 
 
-defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
-defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
+defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 
+defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 
 
 class BF16MM<bit Q, RegisterClass RegTy,
              string opc>
@@ -9091,8 +9091,8 @@ class BF16MM<bit Q, RegisterClass RegTy,
            (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
            N3RegFrm, IIC_VDOTPROD, "", "",
                 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
-                                                (v8bf16 QPR:$Vn),
-                                                (v8bf16 QPR:$Vm)))]> {
+                                                (v8bf16 QPR:$Vn), 
+                                                (v8bf16 QPR:$Vm)))]> { 
    let Constraints = "$dst = $Vd";
    let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
    let DecoderNamespace = "VFPV8";
@@ -9106,8 +9106,8 @@ class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
            NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
                 [(set (v4f32 QPR:$dst),
                       (OpNode (v4f32 QPR:$Vd),
-                              (v8bf16 QPR:$Vn),
-                              (v8bf16 QPR:$Vm)))]> {
+                              (v8bf16 QPR:$Vn), 
+                              (v8bf16 QPR:$Vm)))]> { 
   let Constraints = "$dst = $Vd";
   let DecoderNamespace = "VFPV8";
 }
@@ -9128,9 +9128,9 @@ multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
 
   def : Pat<
     (v4f32 (OpNode (v4f32 QPR:$Vd),
-                   (v8bf16 QPR:$Vn),
-                   (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
-                            VectorIndex16:$lane)))),
+                   (v8bf16 QPR:$Vn), 
+                   (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 
+                            VectorIndex16:$lane)))), 
     (!cast<Instruction>(NAME) QPR:$Vd,
                               QPR:$Vn,
                               (EXTRACT_SUBREG QPR:$Vm,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
index 3a33dfeecd..0b0c510102 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb.td
@@ -548,19 +548,19 @@ let isCall = 1,
 
   // Also used for Thumb2
   def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br,
-                  "blx${p}\t$func", []>,
+                  "blx${p}\t$func", []>, 
               Requires<[IsThumb, HasV5T]>,
               T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24;
     bits<4> func;
     let Inst{6-3} = func;
     let Inst{2-0} = 0b000;
   }
-  def tBLXr_noip :  ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func),
-                   2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>,
-                   Requires<[IsThumb, HasV5T]>,
-                   Sched<[WriteBrL]>;
-
+  def tBLXr_noip :  ARMPseudoExpand<(outs), (ins pred:$p, GPRnoip:$func), 
+                   2, IIC_Br, [], (tBLXr pred:$p, GPR:$func)>, 
+                   Requires<[IsThumb, HasV5T]>, 
+                   Sched<[WriteBrL]>; 
 
+ 
   // ARMv8-M Security Extensions
   def tBLXNSr : TI<(outs), (ins pred:$p, GPRnopc:$func), IIC_Br,
                    "blxns${p}\t$func", []>,
@@ -590,11 +590,11 @@ let isCall = 1,
              Requires<[IsThumb]>, Sched<[WriteBr]>;
 }
 
-def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>,
-      Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>;
-def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>,
-      Requires<[IsThumb, HasV5T, SLSBLRMitigation]>;
-
+def : ARMPat<(ARMcall GPR:$func), (tBLXr $func)>, 
+      Requires<[IsThumb, HasV5T, NoSLSBLRMitigation]>; 
+def : ARMPat<(ARMcall GPRnoip:$func), (tBLXr_noip $func)>, 
+      Requires<[IsThumb, HasV5T, SLSBLRMitigation]>; 
+ 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
   let isPredicable = 1 in
   def tB   : T1pI<(outs), (ins t_brtarget:$target), IIC_Br,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
index 5642cab32e..b79212a48b 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrThumb2.td
@@ -1724,7 +1724,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
 // only.
 // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
 class T2IstT<bits<2> type, string opc, InstrItinClass ii>
-  : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
+  : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc, 
           "\t$Rt, $addr", []>, Sched<[WriteST]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-25} = 0b00;
@@ -2623,9 +2623,9 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
   let Inst{4} = 0;
 }
 
-def : T2Pat<(ARMssat GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMssat GPRnopc:$Rn, imm0_31:$imm), 
              (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
-def : T2Pat<(ARMusat GPRnopc:$Rn, imm0_31:$imm),
+def : T2Pat<(ARMusat GPRnopc:$Rn, imm0_31:$imm), 
              (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
 def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
             (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
@@ -2635,24 +2635,24 @@ def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos),
             (t2SSAT16 imm1_16:$pos, GPR:$a)>;
 def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos),
             (t2USAT16 imm0_15:$pos, GPR:$a)>;
-def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos),
-            (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos),
-            (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
-            (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos),
-            (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>;
-def : T2Pat<(ARMssat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
-            (t2SSAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
-            (t2SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-def : T2Pat<(ARMusat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos),
-            (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>;
-def : T2Pat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos),
-            (t2USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>;
-
-
+def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos), 
+            (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos), 
+            (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>; 
+def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), 
+            (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos), 
+            (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>; 
+def : T2Pat<(ARMssat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), 
+            (t2SSAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : T2Pat<(ARMssat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos), 
+            (t2SSAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>; 
+def : T2Pat<(ARMusat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), 
+            (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; 
+def : T2Pat<(ARMusat (sra GPRnopc:$Rn, asr_imm:$shft), imm0_31:$pos), 
+            (t2USAT imm0_31:$pos, GPRnopc:$Rn, asr_imm:$shft)>; 
+
+ 
 //===----------------------------------------------------------------------===//
 //  Shift and rotate Instructions.
 //
@@ -4935,15 +4935,15 @@ def : InstAlias<"pssbb", (t2DSB 0x4, 14, 0), 1>, Requires<[HasDB, IsThumb2]>;
 // Armv8-R 'Data Full Barrier'
 def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
 
-// SpeculationBarrierEndBB must only be used after an unconditional control
-// flow, i.e. after a terminator for which isBarrier is True.
-let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
-  def t2SpeculationBarrierISBDSBEndBB
-      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-  def t2SpeculationBarrierSBEndBB
-      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
-}
-
+// SpeculationBarrierEndBB must only be used after an unconditional control 
+// flow, i.e. after a terminator for which isBarrier is True. 
+let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 
+  def t2SpeculationBarrierISBDSBEndBB 
+      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; 
+  def t2SpeculationBarrierSBEndBB 
+      : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>; 
+} 
+ 
 // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
 // width specifier.
 def : t2InstAlias<"ldr${p} $Rt, $addr",
@@ -5429,17 +5429,17 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
   let isTerminator = 1;
 }
 
-let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in {
-
-let usesCustomInserter = 1 in
+let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB] in { 
+ 
+let usesCustomInserter = 1 in 
 def t2DoLoopStart :
-  t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br,
-  [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>;
-
-let isTerminator = 1, hasSideEffects = 1 in
-def t2DoLoopStartTP :
-  t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>;
+  t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts), 4, IIC_Br, 
+  [(set GPRlr:$X, (int_start_loop_iterations rGPR:$elts))]>; 
 
+let isTerminator = 1, hasSideEffects = 1 in 
+def t2DoLoopStartTP : 
+  t2PseudoInst<(outs GPRlr:$X), (ins rGPR:$elts, rGPR:$count), 4, IIC_Br, []>; 
+ 
 let hasSideEffects = 0 in
 def t2LoopDec :
   t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
@@ -5458,14 +5458,14 @@ def t2LoopEnd :
   t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
   8, IIC_Br, []>, Sched<[WriteBr]>;
 
-def t2LoopEndDec :
-  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target),
-  8, IIC_Br, []>, Sched<[WriteBr]>;
-
+def t2LoopEndDec : 
+  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$elts, brtarget:$target), 
+  8, IIC_Br, []>, Sched<[WriteBr]>; 
+ 
 } // end isBranch, isTerminator, hasSideEffects
 
-}
-
+} 
+ 
 } // end isNotDuplicable
 
 class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
@@ -5484,7 +5484,7 @@ class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
   let Inst{3-0} = Rm{3-0};
 
   let Uses = [CPSR];
-  let hasSideEffects = 0;
+  let hasSideEffects = 0; 
 }
 
 def t2CSEL  : CS<"csel",  0b1000>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
index 2be58d7a0e..9034b35ded 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrVFP.td
@@ -54,16 +54,16 @@ def vfp_f16imm : Operand<f16>,
   let ParserMatchClass = FPImmOperand;
 }
 
-def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
-      APFloat InVal = N->getValueAPF();
-      uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
-      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
-    }]>;
-
-def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
-      return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
-    }], vfp_f32f16imm_xform>;
-
+def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{ 
+      APFloat InVal = N->getValueAPF(); 
+      uint32_t enc = ARM_AM::getFP32FP16Imm(InVal); 
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); 
+    }]>; 
+ 
+def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{ 
+      return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1; 
+    }], vfp_f32f16imm_xform>; 
+ 
 def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
       APFloat InVal = N->getValueAPF();
       uint32_t enc = ARM_AM::getFP32Imm(InVal);
@@ -1561,8 +1561,8 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
   let Inst{5}     = Sm{0};
   let Inst{15-12} = Sd{4-1};
   let Inst{22}    = Sd{0};
-
-  let hasSideEffects = 0;
+ 
+  let hasSideEffects = 0; 
 }
 
 class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -2626,11 +2626,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
 }
 }
 
-def : Pat<(f32 (vfp_f32f16imm:$imm)),
-          (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
-  let Predicates = [HasFullFP16];
-}
-
+def : Pat<(f32 (vfp_f32f16imm:$imm)), 
+          (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> { 
+  let Predicates = [HasFullFP16]; 
+} 
+ 
 //===----------------------------------------------------------------------===//
 // Assembler aliases.
 //
@@ -2846,12 +2846,12 @@ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
   }
   defm VSTR_P0             : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
                                                 (outs), (ins VCCR:$P0)>;
-
-  let Defs = [VPR] in {
-    defm VLDR_VPR          : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
-  }
-  defm VLDR_P0             : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
-                                                (outs VCCR:$P0), (ins)>;
+ 
+  let Defs = [VPR] in { 
+    defm VLDR_VPR          : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">; 
+  } 
+  defm VLDR_P0             : vfp_vstrldr_sysreg<0b1,0b1101, "p0", 
+                                                (outs VCCR:$P0), (ins)>; 
 }
 
 let Uses = [FPSCR] in {
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
index d9b60f4c4e..92b7dd5047 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -88,7 +88,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
 
   getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
       .legalFor({s32})
-      .clampScalar(0, s32, s32);
+      .clampScalar(0, s32, s32); 
 
   if (ST.hasNEON())
     getActionDefinitionsBuilder({G_ADD, G_SUB})
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index aa1fe4e4ff..e264726f91 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1268,7 +1268,7 @@ findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
   // Thumb1 is already using updating loads/stores.
   if (isThumb1) return false;
-  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
+  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI); 
 
   const MachineOperand &BaseOP = MI->getOperand(0);
   Register Base = BaseOP.getReg();
@@ -1320,10 +1320,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
         return false;
     }
   }
-  if (MergeInstr != MBB.end()) {
-    LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr);
+  if (MergeInstr != MBB.end()) { 
+    LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr); 
     MBB.erase(MergeInstr);
-  }
+  } 
 
   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1338,7 +1338,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
   // Transfer memoperands.
   MIB.setMemRefs(MI->memoperands());
 
-  LLVM_DEBUG(dbgs() << "  Added new load/store: " << *MIB);
+  LLVM_DEBUG(dbgs() << "  Added new load/store: " << *MIB); 
   MBB.erase(MBBI);
   return true;
 }
@@ -1386,27 +1386,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_POST;
-  case ARM::t2LDRBi8:
-  case ARM::t2LDRBi12:
-    return ARM::t2LDRB_POST;
-  case ARM::t2LDRSBi8:
-  case ARM::t2LDRSBi12:
-    return ARM::t2LDRSB_POST;
-  case ARM::t2LDRHi8:
-  case ARM::t2LDRHi12:
-    return ARM::t2LDRH_POST;
-  case ARM::t2LDRSHi8:
-  case ARM::t2LDRSHi12:
-    return ARM::t2LDRSH_POST;
+  case ARM::t2LDRBi8: 
+  case ARM::t2LDRBi12: 
+    return ARM::t2LDRB_POST; 
+  case ARM::t2LDRSBi8: 
+  case ARM::t2LDRSBi12: 
+    return ARM::t2LDRSB_POST; 
+  case ARM::t2LDRHi8: 
+  case ARM::t2LDRHi12: 
+    return ARM::t2LDRH_POST; 
+  case ARM::t2LDRSHi8: 
+  case ARM::t2LDRSHi12: 
+    return ARM::t2LDRSH_POST; 
   case ARM::t2STRi8:
   case ARM::t2STRi12:
     return ARM::t2STR_POST;
-  case ARM::t2STRBi8:
-  case ARM::t2STRBi12:
-    return ARM::t2STRB_POST;
-  case ARM::t2STRHi8:
-  case ARM::t2STRHi12:
-    return ARM::t2STRH_POST;
+  case ARM::t2STRBi8: 
+  case ARM::t2STRBi12: 
+    return ARM::t2STRB_POST; 
+  case ARM::t2STRHi8: 
+  case ARM::t2STRHi12: 
+    return ARM::t2STRH_POST; 
 
   case ARM::MVE_VLDRBS16:
     return ARM::MVE_VLDRBS16_post;
@@ -1449,7 +1449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
   // Thumb1 doesn't have updating LDR/STR.
   // FIXME: Use LDM/STM with single register instead.
   if (isThumb1) return false;
-  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
+  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI); 
 
   Register Base = getLoadStoreBaseOp(*MI).getReg();
   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
@@ -1491,7 +1491,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
     } else
       return false;
   }
-  LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr);
+  LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr); 
   MBB.erase(MergeInstr);
 
   ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1503,54 +1503,54 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
     // updating load/store-multiple instructions can be used with only one
     // register.)
     MachineOperand &MO = MI->getOperand(0);
-    auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
-                   .addReg(Base, getDefRegState(true)) // WB base register
-                   .addReg(Base, getKillRegState(isLd ? BaseKill : false))
-                   .addImm(Pred)
-                   .addReg(PredReg)
-                   .addReg(MO.getReg(), (isLd ? getDefRegState(true)
-                                              : getKillRegState(MO.isKill())))
-                   .cloneMemRefs(*MI);
-    (void)MIB;
-    LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+    auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) 
+                   .addReg(Base, getDefRegState(true)) // WB base register 
+                   .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 
+                   .addImm(Pred) 
+                   .addReg(PredReg) 
+                   .addReg(MO.getReg(), (isLd ? getDefRegState(true) 
+                                              : getKillRegState(MO.isKill()))) 
+                   .cloneMemRefs(*MI); 
+    (void)MIB; 
+    LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
   } else if (isLd) {
     if (isAM2) {
       // LDR_PRE, LDR_POST
       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
-        auto MIB =
-            BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
-                .addReg(Base, RegState::Define)
-                .addReg(Base)
-                .addImm(Offset)
-                .addImm(Pred)
-                .addReg(PredReg)
-                .cloneMemRefs(*MI);
-        (void)MIB;
-        LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+        auto MIB = 
+            BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 
+                .addReg(Base, RegState::Define) 
+                .addReg(Base) 
+                .addImm(Offset) 
+                .addImm(Pred) 
+                .addReg(PredReg) 
+                .cloneMemRefs(*MI); 
+        (void)MIB; 
+        LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
       } else {
         int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
-        auto MIB =
-            BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
-                .addReg(Base, RegState::Define)
-                .addReg(Base)
-                .addReg(0)
-                .addImm(Imm)
-                .add(predOps(Pred, PredReg))
-                .cloneMemRefs(*MI);
-        (void)MIB;
-        LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+        auto MIB = 
+            BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 
+                .addReg(Base, RegState::Define) 
+                .addReg(Base) 
+                .addReg(0) 
+                .addImm(Imm) 
+                .add(predOps(Pred, PredReg)) 
+                .cloneMemRefs(*MI); 
+        (void)MIB; 
+        LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
       }
     } else {
       // t2LDR_PRE, t2LDR_POST
-      auto MIB =
-          BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
-              .addReg(Base, RegState::Define)
-              .addReg(Base)
-              .addImm(Offset)
-              .add(predOps(Pred, PredReg))
-              .cloneMemRefs(*MI);
-      (void)MIB;
-      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+      auto MIB = 
+          BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) 
+              .addReg(Base, RegState::Define) 
+              .addReg(Base) 
+              .addImm(Offset) 
+              .add(predOps(Pred, PredReg)) 
+              .cloneMemRefs(*MI); 
+      (void)MIB; 
+      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
     }
   } else {
     MachineOperand &MO = MI->getOperand(0);
@@ -1560,25 +1560,25 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
       int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
       // STR_PRE, STR_POST
-      auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
-                     .addReg(MO.getReg(), getKillRegState(MO.isKill()))
-                     .addReg(Base)
-                     .addReg(0)
-                     .addImm(Imm)
-                     .add(predOps(Pred, PredReg))
-                     .cloneMemRefs(*MI);
-      (void)MIB;
-      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+      auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 
+                     .addReg(MO.getReg(), getKillRegState(MO.isKill())) 
+                     .addReg(Base) 
+                     .addReg(0) 
+                     .addImm(Imm) 
+                     .add(predOps(Pred, PredReg)) 
+                     .cloneMemRefs(*MI); 
+      (void)MIB; 
+      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
     } else {
       // t2STR_PRE, t2STR_POST
-      auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
-                     .addReg(MO.getReg(), getKillRegState(MO.isKill()))
-                     .addReg(Base)
-                     .addImm(Offset)
-                     .add(predOps(Pred, PredReg))
-                     .cloneMemRefs(*MI);
-      (void)MIB;
-      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB);
+      auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) 
+                     .addReg(MO.getReg(), getKillRegState(MO.isKill())) 
+                     .addReg(Base) 
+                     .addImm(Offset) 
+                     .add(predOps(Pred, PredReg)) 
+                     .cloneMemRefs(*MI); 
+      (void)MIB; 
+      LLVM_DEBUG(dbgs() << "  Added new instruction: " << *MIB); 
     }
   }
   MBB.erase(MBBI);
@@ -1592,7 +1592,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
          "Must have t2STRDi8 or t2LDRDi8");
   if (MI.getOperand(3).getImm() != 0)
     return false;
-  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
+  LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI); 
 
   // Behaviour for writeback is undefined if base register is the same as one
   // of the others.
@@ -1620,7 +1620,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
     } else
       return false;
   }
-  LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr);
+  LLVM_DEBUG(dbgs() << "  Erasing old increment: " << *MergeInstr); 
   MBB.erase(MergeInstr);
 
   DebugLoc DL = MI.getDebugLoc();
@@ -1642,7 +1642,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
     MIB.add(MO);
   MIB.cloneMemRefs(MI);
 
-  LLVM_DEBUG(dbgs() << "  Added new load/store: " << *MIB);
+  LLVM_DEBUG(dbgs() << "  Added new load/store: " << *MIB); 
   MBB.erase(MBBI);
   return true;
 }
@@ -2585,169 +2585,169 @@ static int getBaseOperandIndex(MachineInstr &MI) {
   case ARM::MVE_VSTRBU8:
   case ARM::MVE_VSTRHU16:
   case ARM::MVE_VSTRWU32:
-  case ARM::t2LDRHi8:
-  case ARM::t2LDRHi12:
-  case ARM::t2LDRSHi8:
-  case ARM::t2LDRSHi12:
-  case ARM::t2LDRBi8:
-  case ARM::t2LDRBi12:
-  case ARM::t2LDRSBi8:
-  case ARM::t2LDRSBi12:
-  case ARM::t2STRBi8:
-  case ARM::t2STRBi12:
-  case ARM::t2STRHi8:
-  case ARM::t2STRHi12:
+  case ARM::t2LDRHi8: 
+  case ARM::t2LDRHi12: 
+  case ARM::t2LDRSHi8: 
+  case ARM::t2LDRSHi12: 
+  case ARM::t2LDRBi8: 
+  case ARM::t2LDRBi12: 
+  case ARM::t2LDRSBi8: 
+  case ARM::t2LDRSBi12: 
+  case ARM::t2STRBi8: 
+  case ARM::t2STRBi12: 
+  case ARM::t2STRHi8: 
+  case ARM::t2STRHi12: 
     return 1;
-  case ARM::MVE_VLDRBS16_post:
-  case ARM::MVE_VLDRBS32_post:
-  case ARM::MVE_VLDRBU16_post:
-  case ARM::MVE_VLDRBU32_post:
-  case ARM::MVE_VLDRHS32_post:
-  case ARM::MVE_VLDRHU32_post:
-  case ARM::MVE_VLDRBU8_post:
-  case ARM::MVE_VLDRHU16_post:
-  case ARM::MVE_VLDRWU32_post:
-  case ARM::MVE_VSTRB16_post:
-  case ARM::MVE_VSTRB32_post:
-  case ARM::MVE_VSTRH32_post:
-  case ARM::MVE_VSTRBU8_post:
-  case ARM::MVE_VSTRHU16_post:
-  case ARM::MVE_VSTRWU32_post:
-  case ARM::MVE_VLDRBS16_pre:
-  case ARM::MVE_VLDRBS32_pre:
-  case ARM::MVE_VLDRBU16_pre:
-  case ARM::MVE_VLDRBU32_pre:
-  case ARM::MVE_VLDRHS32_pre:
-  case ARM::MVE_VLDRHU32_pre:
-  case ARM::MVE_VLDRBU8_pre:
-  case ARM::MVE_VLDRHU16_pre:
-  case ARM::MVE_VLDRWU32_pre:
-  case ARM::MVE_VSTRB16_pre:
-  case ARM::MVE_VSTRB32_pre:
-  case ARM::MVE_VSTRH32_pre:
-  case ARM::MVE_VSTRBU8_pre:
-  case ARM::MVE_VSTRHU16_pre:
-  case ARM::MVE_VSTRWU32_pre:
-    return 2;
+  case ARM::MVE_VLDRBS16_post: 
+  case ARM::MVE_VLDRBS32_post: 
+  case ARM::MVE_VLDRBU16_post: 
+  case ARM::MVE_VLDRBU32_post: 
+  case ARM::MVE_VLDRHS32_post: 
+  case ARM::MVE_VLDRHU32_post: 
+  case ARM::MVE_VLDRBU8_post: 
+  case ARM::MVE_VLDRHU16_post: 
+  case ARM::MVE_VLDRWU32_post: 
+  case ARM::MVE_VSTRB16_post: 
+  case ARM::MVE_VSTRB32_post: 
+  case ARM::MVE_VSTRH32_post: 
+  case ARM::MVE_VSTRBU8_post: 
+  case ARM::MVE_VSTRHU16_post: 
+  case ARM::MVE_VSTRWU32_post: 
+  case ARM::MVE_VLDRBS16_pre: 
+  case ARM::MVE_VLDRBS32_pre: 
+  case ARM::MVE_VLDRBU16_pre: 
+  case ARM::MVE_VLDRBU32_pre: 
+  case ARM::MVE_VLDRHS32_pre: 
+  case ARM::MVE_VLDRHU32_pre: 
+  case ARM::MVE_VLDRBU8_pre: 
+  case ARM::MVE_VLDRHU16_pre: 
+  case ARM::MVE_VLDRWU32_pre: 
+  case ARM::MVE_VSTRB16_pre: 
+  case ARM::MVE_VSTRB32_pre: 
+  case ARM::MVE_VSTRH32_pre: 
+  case ARM::MVE_VSTRBU8_pre: 
+  case ARM::MVE_VSTRHU16_pre: 
+  case ARM::MVE_VSTRWU32_pre: 
+    return 2; 
   }
   return -1;
 }
 
-static bool isPostIndex(MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case ARM::MVE_VLDRBS16_post:
-  case ARM::MVE_VLDRBS32_post:
-  case ARM::MVE_VLDRBU16_post:
-  case ARM::MVE_VLDRBU32_post:
-  case ARM::MVE_VLDRHS32_post:
-  case ARM::MVE_VLDRHU32_post:
-  case ARM::MVE_VLDRBU8_post:
-  case ARM::MVE_VLDRHU16_post:
-  case ARM::MVE_VLDRWU32_post:
-  case ARM::MVE_VSTRB16_post:
-  case ARM::MVE_VSTRB32_post:
-  case ARM::MVE_VSTRH32_post:
-  case ARM::MVE_VSTRBU8_post:
-  case ARM::MVE_VSTRHU16_post:
-  case ARM::MVE_VSTRWU32_post:
-    return true;
-  }
-  return false;
-}
-
-static bool isPreIndex(MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case ARM::MVE_VLDRBS16_pre:
-  case ARM::MVE_VLDRBS32_pre:
-  case ARM::MVE_VLDRBU16_pre:
-  case ARM::MVE_VLDRBU32_pre:
-  case ARM::MVE_VLDRHS32_pre:
-  case ARM::MVE_VLDRHU32_pre:
-  case ARM::MVE_VLDRBU8_pre:
-  case ARM::MVE_VLDRHU16_pre:
-  case ARM::MVE_VLDRWU32_pre:
-  case ARM::MVE_VSTRB16_pre:
-  case ARM::MVE_VSTRB32_pre:
-  case ARM::MVE_VSTRH32_pre:
-  case ARM::MVE_VSTRBU8_pre:
-  case ARM::MVE_VSTRHU16_pre:
-  case ARM::MVE_VSTRWU32_pre:
-    return true;
-  }
-  return false;
-}
-
-// Given a memory access Opcode, check that the give Imm would be a valid Offset
-// for this instruction (same as isLegalAddressImm), Or if the instruction
-// could be easily converted to one where that was valid. For example converting
-// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
-// AdjustBaseAndOffset below.
-static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
-                                           const TargetInstrInfo *TII,
-                                           int &CodesizeEstimate) {
-  if (isLegalAddressImm(Opcode, Imm, TII))
-    return true;
-
-  // We can convert AddrModeT2_i12 to AddrModeT2_i8.
-  const MCInstrDesc &Desc = TII->get(Opcode);
-  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-  switch (AddrMode) {
-  case ARMII::AddrModeT2_i12:
-    CodesizeEstimate += 1;
-    return std::abs(Imm) < (((1 << 8) * 1) - 1);
-  }
-  return false;
-}
-
-// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
-// by -Offset. This can either happen in-place or be a replacement as MI is
-// converted to another instruction type.
-static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
-                                int Offset, const TargetInstrInfo *TII) {
-  unsigned BaseOp = getBaseOperandIndex(*MI);
-  MI->getOperand(BaseOp).setReg(NewBaseReg);
-  int OldOffset = MI->getOperand(BaseOp + 1).getImm();
-  if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
-    MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
-  else {
-    unsigned ConvOpcode;
-    switch (MI->getOpcode()) {
-    case ARM::t2LDRHi12:
-      ConvOpcode = ARM::t2LDRHi8;
-      break;
-    case ARM::t2LDRSHi12:
-      ConvOpcode = ARM::t2LDRSHi8;
-      break;
-    case ARM::t2LDRBi12:
-      ConvOpcode = ARM::t2LDRBi8;
-      break;
-    case ARM::t2LDRSBi12:
-      ConvOpcode = ARM::t2LDRSBi8;
-      break;
-    case ARM::t2STRHi12:
-      ConvOpcode = ARM::t2STRHi8;
-      break;
-    case ARM::t2STRBi12:
-      ConvOpcode = ARM::t2STRBi8;
-      break;
-    default:
-      llvm_unreachable("Unhandled convertable opcode");
-    }
-    assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
-           "Illegal Address Immediate after convert!");
-
-    const MCInstrDesc &MCID = TII->get(ConvOpcode);
-    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
-        .add(MI->getOperand(0))
-        .add(MI->getOperand(1))
-        .addImm(OldOffset - Offset)
-        .add(MI->getOperand(3))
-        .add(MI->getOperand(4))
-        .cloneMemRefs(*MI);
-    MI->eraseFromParent();
-  }
-}
-
+static bool isPostIndex(MachineInstr &MI) { 
+  switch (MI.getOpcode()) { 
+  case ARM::MVE_VLDRBS16_post: 
+  case ARM::MVE_VLDRBS32_post: 
+  case ARM::MVE_VLDRBU16_post: 
+  case ARM::MVE_VLDRBU32_post: 
+  case ARM::MVE_VLDRHS32_post: 
+  case ARM::MVE_VLDRHU32_post: 
+  case ARM::MVE_VLDRBU8_post: 
+  case ARM::MVE_VLDRHU16_post: 
+  case ARM::MVE_VLDRWU32_post: 
+  case ARM::MVE_VSTRB16_post: 
+  case ARM::MVE_VSTRB32_post: 
+  case ARM::MVE_VSTRH32_post: 
+  case ARM::MVE_VSTRBU8_post: 
+  case ARM::MVE_VSTRHU16_post: 
+  case ARM::MVE_VSTRWU32_post: 
+    return true; 
+  } 
+  return false; 
+} 
+ 
+static bool isPreIndex(MachineInstr &MI) { 
+  switch (MI.getOpcode()) { 
+  case ARM::MVE_VLDRBS16_pre: 
+  case ARM::MVE_VLDRBS32_pre: 
+  case ARM::MVE_VLDRBU16_pre: 
+  case ARM::MVE_VLDRBU32_pre: 
+  case ARM::MVE_VLDRHS32_pre: 
+  case ARM::MVE_VLDRHU32_pre: 
+  case ARM::MVE_VLDRBU8_pre: 
+  case ARM::MVE_VLDRHU16_pre: 
+  case ARM::MVE_VLDRWU32_pre: 
+  case ARM::MVE_VSTRB16_pre: 
+  case ARM::MVE_VSTRB32_pre: 
+  case ARM::MVE_VSTRH32_pre: 
+  case ARM::MVE_VSTRBU8_pre: 
+  case ARM::MVE_VSTRHU16_pre: 
+  case ARM::MVE_VSTRWU32_pre: 
+    return true; 
+  } 
+  return false; 
+} 
+ 
+// Given a memory access Opcode, check that the give Imm would be a valid Offset 
+// for this instruction (same as isLegalAddressImm), Or if the instruction 
+// could be easily converted to one where that was valid. For example converting 
+// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with 
+// AdjustBaseAndOffset below. 
+static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, 
+                                           const TargetInstrInfo *TII, 
+                                           int &CodesizeEstimate) { 
+  if (isLegalAddressImm(Opcode, Imm, TII)) 
+    return true; 
+ 
+  // We can convert AddrModeT2_i12 to AddrModeT2_i8. 
+  const MCInstrDesc &Desc = TII->get(Opcode); 
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 
+  switch (AddrMode) { 
+  case ARMII::AddrModeT2_i12: 
+    CodesizeEstimate += 1; 
+    return std::abs(Imm) < (((1 << 8) * 1) - 1); 
+  } 
+  return false; 
+} 
+ 
+// Given an MI adjust its address BaseReg to use NewBaseReg and address offset 
+// by -Offset. This can either happen in-place or be a replacement as MI is 
+// converted to another instruction type. 
+static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, 
+                                int Offset, const TargetInstrInfo *TII) { 
+  unsigned BaseOp = getBaseOperandIndex(*MI); 
+  MI->getOperand(BaseOp).setReg(NewBaseReg); 
+  int OldOffset = MI->getOperand(BaseOp + 1).getImm(); 
+  if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII)) 
+    MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset); 
+  else { 
+    unsigned ConvOpcode; 
+    switch (MI->getOpcode()) { 
+    case ARM::t2LDRHi12: 
+      ConvOpcode = ARM::t2LDRHi8; 
+      break; 
+    case ARM::t2LDRSHi12: 
+      ConvOpcode = ARM::t2LDRSHi8; 
+      break; 
+    case ARM::t2LDRBi12: 
+      ConvOpcode = ARM::t2LDRBi8; 
+      break; 
+    case ARM::t2LDRSBi12: 
+      ConvOpcode = ARM::t2LDRSBi8; 
+      break; 
+    case ARM::t2STRHi12: 
+      ConvOpcode = ARM::t2STRHi8; 
+      break; 
+    case ARM::t2STRBi12: 
+      ConvOpcode = ARM::t2STRBi8; 
+      break; 
+    default: 
+      llvm_unreachable("Unhandled convertable opcode"); 
+    } 
+    assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) && 
+           "Illegal Address Immediate after convert!"); 
+ 
+    const MCInstrDesc &MCID = TII->get(ConvOpcode); 
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) 
+        .add(MI->getOperand(0)) 
+        .add(MI->getOperand(1)) 
+        .addImm(OldOffset - Offset) 
+        .add(MI->getOperand(3)) 
+        .add(MI->getOperand(4)) 
+        .cloneMemRefs(*MI); 
+    MI->eraseFromParent(); 
+  } 
+} 
+ 
 static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
                                             Register NewReg,
                                             const TargetInstrInfo *TII,
@@ -2766,70 +2766,70 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
   TRC = TII->getRegClass(MCID, 2, TRI, *MF);
   MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
 
-  unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
-  switch (AddrMode) {
-  case ARMII::AddrModeT2_i7:
-  case ARMII::AddrModeT2_i7s2:
-  case ARMII::AddrModeT2_i7s4:
-    // Any MVE load/store
-    return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
-        .addReg(NewReg, RegState::Define)
-        .add(MI->getOperand(0))
-        .add(MI->getOperand(1))
-        .addImm(Offset)
-        .add(MI->getOperand(3))
-        .add(MI->getOperand(4))
-        .cloneMemRefs(*MI);
-  case ARMII::AddrModeT2_i8:
-    if (MI->mayLoad()) {
-      return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
-          .add(MI->getOperand(0))
-          .addReg(NewReg, RegState::Define)
-          .add(MI->getOperand(1))
-          .addImm(Offset)
-          .add(MI->getOperand(3))
-          .add(MI->getOperand(4))
-          .cloneMemRefs(*MI);
-    } else {
-      return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
-          .addReg(NewReg, RegState::Define)
-          .add(MI->getOperand(0))
-          .add(MI->getOperand(1))
-          .addImm(Offset)
-          .add(MI->getOperand(3))
-          .add(MI->getOperand(4))
-          .cloneMemRefs(*MI);
-    }
-  default:
-    llvm_unreachable("Unhandled createPostIncLoadStore");
-  }
+  unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); 
+  switch (AddrMode) { 
+  case ARMII::AddrModeT2_i7: 
+  case ARMII::AddrModeT2_i7s2: 
+  case ARMII::AddrModeT2_i7s4: 
+    // Any MVE load/store 
+    return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) 
+        .addReg(NewReg, RegState::Define) 
+        .add(MI->getOperand(0)) 
+        .add(MI->getOperand(1)) 
+        .addImm(Offset) 
+        .add(MI->getOperand(3)) 
+        .add(MI->getOperand(4)) 
+        .cloneMemRefs(*MI); 
+  case ARMII::AddrModeT2_i8: 
+    if (MI->mayLoad()) { 
+      return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) 
+          .add(MI->getOperand(0)) 
+          .addReg(NewReg, RegState::Define) 
+          .add(MI->getOperand(1)) 
+          .addImm(Offset) 
+          .add(MI->getOperand(3)) 
+          .add(MI->getOperand(4)) 
+          .cloneMemRefs(*MI); 
+    } else { 
+      return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) 
+          .addReg(NewReg, RegState::Define) 
+          .add(MI->getOperand(0)) 
+          .add(MI->getOperand(1)) 
+          .addImm(Offset) 
+          .add(MI->getOperand(3)) 
+          .add(MI->getOperand(4)) 
+          .cloneMemRefs(*MI); 
+    } 
+  default: 
+    llvm_unreachable("Unhandled createPostIncLoadStore"); 
+  } 
 }
 
 // Given a Base Register, optimise the load/store uses to attempt to create more
-// post-inc accesses and less register moves. We do this by taking zero offset
-// loads/stores with an add, and convert them to a postinc load/store of the
-// same type. Any subsequent accesses will be adjusted to use and account for
-// the post-inc value.
+// post-inc accesses and less register moves. We do this by taking zero offset 
+// loads/stores with an add, and convert them to a postinc load/store of the 
+// same type. Any subsequent accesses will be adjusted to use and account for 
+// the post-inc value. 
 // For example:
 // LDR #0            LDR_POSTINC #16
 // LDR #4            LDR #-12
 // LDR #8            LDR #-8
 // LDR #12           LDR #-4
 // ADD #16
-//
-// At the same time if we do not find an increment but do find an existing
-// pre/post inc instruction, we can still adjust the offsets of subsequent
-// instructions to save the register move that would otherwise be needed for the
-// in-place increment.
+// 
+// At the same time if we do not find an increment but do find an existing 
+// pre/post inc instruction, we can still adjust the offsets of subsequent 
+// instructions to save the register move that would otherwise be needed for the 
+// in-place increment. 
 bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
   // We are looking for:
   // One zero offset load/store that can become postinc
   MachineInstr *BaseAccess = nullptr;
-  MachineInstr *PrePostInc = nullptr;
+  MachineInstr *PrePostInc = nullptr; 
   // An increment that can be folded in
   MachineInstr *Increment = nullptr;
   // Other accesses after BaseAccess that will need to be updated to use the
-  // postinc value.
+  // postinc value. 
   SmallPtrSet<MachineInstr *, 8> OtherAccesses;
   for (auto &Use : MRI->use_nodbg_instructions(Base)) {
     if (!Increment && getAddSubImmediate(Use) != 0) {
@@ -2844,81 +2844,81 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
     if (!Use.getOperand(BaseOp).isReg() ||
         Use.getOperand(BaseOp).getReg() != Base)
       return false;
-    if (isPreIndex(Use) || isPostIndex(Use))
-      PrePostInc = &Use;
-    else if (Use.getOperand(BaseOp + 1).getImm() == 0)
+    if (isPreIndex(Use) || isPostIndex(Use)) 
+      PrePostInc = &Use; 
+    else if (Use.getOperand(BaseOp + 1).getImm() == 0) 
       BaseAccess = &Use;
     else
       OtherAccesses.insert(&Use);
   }
 
-  int IncrementOffset;
-  Register NewBaseReg;
-  if (BaseAccess && Increment) {
-    if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
-      return false;
-    Register PredReg;
-    if (Increment->definesRegister(ARM::CPSR) ||
-        getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
-      return false;
-
-    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
-                      << Base.virtRegIndex() << "\n");
-
-    // Make sure that Increment has no uses before BaseAccess.
-    for (MachineInstr &Use :
-        MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
-      if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
-        LLVM_DEBUG(dbgs() << "  BaseAccess doesn't dominate use of increment\n");
-        return false;
-      }
-    }
-
-    // Make sure that Increment can be folded into Base
-    IncrementOffset = getAddSubImmediate(*Increment);
-    unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
-        BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
-    if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
-      LLVM_DEBUG(dbgs() << "  Illegal addressing mode immediate on postinc\n");
+  int IncrementOffset; 
+  Register NewBaseReg; 
+  if (BaseAccess && Increment) { 
+    if (PrePostInc || BaseAccess->getParent() != Increment->getParent()) 
+      return false; 
+    Register PredReg; 
+    if (Increment->definesRegister(ARM::CPSR) || 
+        getInstrPredicate(*Increment, PredReg) != ARMCC::AL) 
+      return false; 
+
+    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " 
+                      << Base.virtRegIndex() << "\n"); 
+
+    // Make sure that Increment has no uses before BaseAccess. 
+    for (MachineInstr &Use : 
+        MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { 
+      if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { 
+        LLVM_DEBUG(dbgs() << "  BaseAccess doesn't dominate use of increment\n"); 
+        return false; 
+      } 
+    } 
+ 
+    // Make sure that Increment can be folded into Base 
+    IncrementOffset = getAddSubImmediate(*Increment); 
+    unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode( 
+        BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub); 
+    if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) { 
+      LLVM_DEBUG(dbgs() << "  Illegal addressing mode immediate on postinc\n"); 
       return false;
     }
   }
-  else if (PrePostInc) {
-    // If we already have a pre/post index load/store then set BaseAccess,
-    // IncrementOffset and NewBaseReg to the values it already produces,
-    // allowing us to update and subsequent uses of BaseOp reg with the
-    // incremented value.
-    if (Increment)
-      return false;
-
-    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
-                      << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
-    int BaseOp = getBaseOperandIndex(*PrePostInc);
-    IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
-    BaseAccess = PrePostInc;
-    NewBaseReg = PrePostInc->getOperand(0).getReg();
-  }
-  else
+  else if (PrePostInc) { 
+    // If we already have a pre/post index load/store then set BaseAccess, 
+    // IncrementOffset and NewBaseReg to the values it already produces, 
+    // allowing us to update and subsequent uses of BaseOp reg with the 
+    // incremented value. 
+    if (Increment) 
+      return false; 
+
+    LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already " 
+                      << "indexed VirtualReg " << Base.virtRegIndex() << "\n"); 
+    int BaseOp = getBaseOperandIndex(*PrePostInc); 
+    IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm(); 
+    BaseAccess = PrePostInc; 
+    NewBaseReg = PrePostInc->getOperand(0).getReg(); 
+  } 
+  else 
     return false;
 
   // And make sure that the negative value of increment can be added to all
   // other offsets after the BaseAccess. We rely on either
   // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
   // to keep things simple.
-  // This also adds a simple codesize metric, to detect if an instruction (like
-  // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
-  // cannot because it is converted to something else (t2LDRBi8). We start this
-  // at -1 for the gain from removing the increment.
+  // This also adds a simple codesize metric, to detect if an instruction (like 
+  // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi) 
+  // cannot because it is converted to something else (t2LDRBi8). We start this 
+  // at -1 for the gain from removing the increment. 
   SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
-  int CodesizeEstimate = -1;
+  int CodesizeEstimate = -1; 
   for (auto *Use : OtherAccesses) {
     if (DT->dominates(BaseAccess, Use)) {
       SuccessorAccesses.insert(Use);
       unsigned BaseOp = getBaseOperandIndex(*Use);
-      if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
-                                          Use->getOperand(BaseOp + 1).getImm() -
-                                              IncrementOffset,
-                                          TII, CodesizeEstimate)) {
+      if (!isLegalOrConvertableAddressImm(Use->getOpcode(), 
+                                          Use->getOperand(BaseOp + 1).getImm() - 
+                                              IncrementOffset, 
+                                          TII, CodesizeEstimate)) { 
         LLVM_DEBUG(dbgs() << "  Illegal addressing mode immediate on use\n");
         return false;
       }
@@ -2928,27 +2928,27 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
       return false;
     }
   }
-  if (STI->hasMinSize() && CodesizeEstimate > 0) {
-    LLVM_DEBUG(dbgs() << "  Expected to grow instructions under minsize\n");
-    return false;
-  }
-
-  if (!PrePostInc) {
-    // Replace BaseAccess with a post inc
-    LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
-    LLVM_DEBUG(dbgs() << "  And   : "; Increment->dump());
-    NewBaseReg = Increment->getOperand(0).getReg();
-    MachineInstr *BaseAccessPost =
-        createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
-    BaseAccess->eraseFromParent();
-    Increment->eraseFromParent();
-    (void)BaseAccessPost;
-    LLVM_DEBUG(dbgs() << "  To    : "; BaseAccessPost->dump());
-  }
+  if (STI->hasMinSize() && CodesizeEstimate > 0) { 
+    LLVM_DEBUG(dbgs() << "  Expected to grow instructions under minsize\n"); 
+    return false; 
+  } 
+
+  if (!PrePostInc) { 
+    // Replace BaseAccess with a post inc 
+    LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump()); 
+    LLVM_DEBUG(dbgs() << "  And   : "; Increment->dump()); 
+    NewBaseReg = Increment->getOperand(0).getReg(); 
+    MachineInstr *BaseAccessPost = 
+        createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI); 
+    BaseAccess->eraseFromParent(); 
+    Increment->eraseFromParent(); 
+    (void)BaseAccessPost; 
+    LLVM_DEBUG(dbgs() << "  To    : "; BaseAccessPost->dump()); 
+  } 
 
   for (auto *Use : SuccessorAccesses) {
     LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
-    AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
+    AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII); 
     LLVM_DEBUG(dbgs() << "  To    : "; Use->dump());
   }
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 8dc5320584..144e845550 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -56,7 +56,7 @@
 #include "ARMBaseRegisterInfo.h"
 #include "ARMBasicBlockInfo.h"
 #include "ARMSubtarget.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h" 
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallSet.h"
@@ -74,37 +74,37 @@ using namespace llvm;
 #define DEBUG_TYPE "arm-low-overhead-loops"
 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
 
-static cl::opt<bool>
-DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
-    cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
-    cl::init(false));
-
-static bool isVectorPredicated(MachineInstr *MI) {
-  int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
-  return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
-}
-
-static bool isVectorPredicate(MachineInstr *MI) {
-  return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
-}
-
-static bool hasVPRUse(MachineInstr &MI) {
-  return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
-}
-
-static bool isDomainMVE(MachineInstr *MI) {
-  uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
-  return Domain == ARMII::DomainMVE;
-}
-
-static bool shouldInspect(MachineInstr &MI) {
-  return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
-}
-
-static bool isDo(MachineInstr *MI) {
-  return MI->getOpcode() != ARM::t2WhileLoopStart;
-}
-
+static cl::opt<bool> 
+DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, 
+    cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), 
+    cl::init(false)); 
+ 
+static bool isVectorPredicated(MachineInstr *MI) { 
+  int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); 
+  return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR; 
+} 
+ 
+static bool isVectorPredicate(MachineInstr *MI) { 
+  return MI->findRegisterDefOperandIdx(ARM::VPR) != -1; 
+} 
+ 
+static bool hasVPRUse(MachineInstr &MI) { 
+  return MI.findRegisterUseOperandIdx(ARM::VPR) != -1; 
+} 
+ 
+static bool isDomainMVE(MachineInstr *MI) { 
+  uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 
+  return Domain == ARMII::DomainMVE; 
+} 
+ 
+static bool shouldInspect(MachineInstr &MI) { 
+  return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI); 
+} 
+ 
+static bool isDo(MachineInstr *MI) { 
+  return MI->getOpcode() != ARM::t2WhileLoopStart; 
+} 
+ 
 namespace {
 
   using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -143,7 +143,7 @@ namespace {
       // Insert exit blocks.
       SmallVector<MachineBasicBlock*, 2> ExitBlocks;
       ML.getExitBlocks(ExitBlocks);
-      append_range(Order, ExitBlocks);
+      append_range(Order, ExitBlocks); 
 
       // Then add the loop body.
       Search(ML.getHeader());
@@ -174,187 +174,187 @@ namespace {
     }
   };
 
-  // Represent the current state of the VPR and hold all instances which
-  // represent a VPT block, which is a list of instructions that begins with a
-  // VPT/VPST and has a maximum of four proceeding instructions. All
-  // instructions within the block are predicated upon the vpr and we allow
-  // instructions to define the vpr within in the block too.
-  class VPTState {
-    friend struct LowOverheadLoop;
-
-    SmallVector<MachineInstr *, 4> Insts;
-
-    static SmallVector<VPTState, 4> Blocks;
-    static SetVector<MachineInstr *> CurrentPredicates;
-    static std::map<MachineInstr *,
-      std::unique_ptr<PredicatedMI>> PredicatedInsts;
-
-    static void CreateVPTBlock(MachineInstr *MI) {
-      assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
-             && "Can't begin VPT without predicate");
-      Blocks.emplace_back(MI);
-      // The execution of MI is predicated upon the current set of instructions
-      // that are AND'ed together to form the VPR predicate value. In the case
-      // that MI is a VPT, CurrentPredicates will also just be MI.
-      PredicatedInsts.emplace(
-        MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
-    }
-
-    static void reset() {
-      Blocks.clear();
-      PredicatedInsts.clear();
-      CurrentPredicates.clear();
-    }
-
-    static void addInst(MachineInstr *MI) {
-      Blocks.back().insert(MI);
-      PredicatedInsts.emplace(
-        MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
-    }
-
-    static void addPredicate(MachineInstr *MI) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
-      CurrentPredicates.insert(MI);
+  // Represent the current state of the VPR and hold all instances which 
+  // represent a VPT block, which is a list of instructions that begins with a 
+  // VPT/VPST and has a maximum of four proceeding instructions. All 
+  // instructions within the block are predicated upon the vpr and we allow 
+  // instructions to define the vpr within in the block too. 
+  class VPTState { 
+    friend struct LowOverheadLoop; 
+
+    SmallVector<MachineInstr *, 4> Insts; 
+ 
+    static SmallVector<VPTState, 4> Blocks; 
+    static SetVector<MachineInstr *> CurrentPredicates; 
+    static std::map<MachineInstr *, 
+      std::unique_ptr<PredicatedMI>> PredicatedInsts; 
+ 
+    static void CreateVPTBlock(MachineInstr *MI) { 
+      assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR)) 
+             && "Can't begin VPT without predicate"); 
+      Blocks.emplace_back(MI); 
+      // The execution of MI is predicated upon the current set of instructions 
+      // that are AND'ed together to form the VPR predicate value. In the case 
+      // that MI is a VPT, CurrentPredicates will also just be MI. 
+      PredicatedInsts.emplace( 
+        MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates)); 
     }
 
-    static void resetPredicate(MachineInstr *MI) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
-      CurrentPredicates.clear();
-      CurrentPredicates.insert(MI);
+    static void reset() { 
+      Blocks.clear(); 
+      PredicatedInsts.clear(); 
+      CurrentPredicates.clear(); 
     }
 
-  public:
+    static void addInst(MachineInstr *MI) { 
+      Blocks.back().insert(MI); 
+      PredicatedInsts.emplace( 
+        MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates)); 
+    } 
+ 
+    static void addPredicate(MachineInstr *MI) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI); 
+      CurrentPredicates.insert(MI); 
+    } 
+ 
+    static void resetPredicate(MachineInstr *MI) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI); 
+      CurrentPredicates.clear(); 
+      CurrentPredicates.insert(MI); 
+    } 
+ 
+  public: 
     // Have we found an instruction within the block which defines the vpr? If
     // so, not all the instructions in the block will have the same predicate.
-    static bool hasUniformPredicate(VPTState &Block) {
-      return getDivergent(Block) == nullptr;
+    static bool hasUniformPredicate(VPTState &Block) { 
+      return getDivergent(Block) == nullptr; 
     }
 
-    // If it exists, return the first internal instruction which modifies the
-    // VPR.
-    static MachineInstr *getDivergent(VPTState &Block) {
-      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-      for (unsigned i = 1; i < Insts.size(); ++i) {
-        MachineInstr *Next = Insts[i];
-        if (isVectorPredicate(Next))
-          return Next; // Found an instruction altering the vpr.
-      }
-      return nullptr;
-    }
-
-    // Return whether the given instruction is predicated upon a VCTP.
-    static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
-      SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
-      if (Exclusive && Predicates.size() != 1)
-        return false;
-      for (auto *PredMI : Predicates)
-        if (isVCTP(PredMI))
-          return true;
-      return false;
-    }
-
-    // Is the VPST, controlling the block entry, predicated upon a VCTP.
-    static bool isEntryPredicatedOnVCTP(VPTState &Block,
-                                        bool Exclusive = false) {
-      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-      return isPredicatedOnVCTP(Insts.front(), Exclusive);
-    }
-
-    // If this block begins with a VPT, we can check whether it's using
-    // at least one predicated input(s), as well as possible loop invariant
-    // which would result in it being implicitly predicated.
-    static bool hasImplicitlyValidVPT(VPTState &Block,
-                                      ReachingDefAnalysis &RDA) {
-      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-      MachineInstr *VPT = Insts.front();
-      assert(isVPTOpcode(VPT->getOpcode()) &&
-             "Expected VPT block to begin with VPT/VPST");
-
-      if (VPT->getOpcode() == ARM::MVE_VPST)
-        return false;
-
-      auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
-        MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
-        return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
-      };
-
-      auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
-        MachineOperand &MO = MI->getOperand(Idx);
-        if (!MO.isReg() || !MO.getReg())
-          return true;
-
-        SmallPtrSet<MachineInstr *, 2> Defs;
-        RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
-        if (Defs.empty())
-          return true;
-
-        for (auto *Def : Defs)
-          if (Def->getParent() == VPT->getParent())
-            return false;
-        return true;
-      };
-
-      // Check that at least one of the operands is directly predicated on a
-      // vctp and allow an invariant value too.
-      return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
-             (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
-             (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
-    }
-
-    static bool isValid(ReachingDefAnalysis &RDA) {
-      // All predication within the loop should be based on vctp. If the block
-      // isn't predicated on entry, check whether the vctp is within the block
-      // and that all other instructions are then predicated on it.
-      for (auto &Block : Blocks) {
-        if (isEntryPredicatedOnVCTP(Block, false) ||
-            hasImplicitlyValidVPT(Block, RDA))
-          continue;
-
-        SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-        // We don't know how to convert a block with just a VPT;VCTP into
-        // anything valid once we remove the VCTP. For now just bail out.
-        assert(isVPTOpcode(Insts.front()->getOpcode()) &&
-               "Expected VPT block to start with a VPST or VPT!");
-        if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
-            isVCTP(Insts.back()))
-          return false;
-
-        for (auto *MI : Insts) {
-          // Check that any internal VCTPs are 'Then' predicated.
-          if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then)
-            return false;
-          // Skip other instructions that build up the predicate.
-          if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
-            continue;
-          // Check that any other instructions are predicated upon a vctp.
-          // TODO: We could infer when VPTs are implicitly predicated on the
-          // vctp (when the operands are predicated).
-          if (!isPredicatedOnVCTP(MI)) {
-            LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
-            return false;
-          }
-        }
-      }
-      return true;
+    // If it exists, return the first internal instruction which modifies the 
+    // VPR. 
+    static MachineInstr *getDivergent(VPTState &Block) { 
+      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts(); 
+      for (unsigned i = 1; i < Insts.size(); ++i) { 
+        MachineInstr *Next = Insts[i]; 
+        if (isVectorPredicate(Next)) 
+          return Next; // Found an instruction altering the vpr. 
+      } 
+      return nullptr; 
     }
 
-    VPTState(MachineInstr *MI) { Insts.push_back(MI); }
-
-    void insert(MachineInstr *MI) {
-      Insts.push_back(MI);
-      // VPT/VPST + 4 predicated instructions.
-      assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
+    // Return whether the given instruction is predicated upon a VCTP. 
+    static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) { 
+      SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates; 
+      if (Exclusive && Predicates.size() != 1) 
+        return false; 
+      for (auto *PredMI : Predicates) 
+        if (isVCTP(PredMI)) 
+          return true; 
+      return false; 
+    } 
+
+    // Is the VPST, controlling the block entry, predicated upon a VCTP. 
+    static bool isEntryPredicatedOnVCTP(VPTState &Block, 
+                                        bool Exclusive = false) { 
+      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts(); 
+      return isPredicatedOnVCTP(Insts.front(), Exclusive); 
     }
 
-    bool containsVCTP() const {
-      for (auto *MI : Insts)
-        if (isVCTP(MI))
-          return true;
-      return false;
+    // If this block begins with a VPT, we can check whether it's using 
+    // at least one predicated input(s), as well as possible loop invariant 
+    // which would result in it being implicitly predicated. 
+    static bool hasImplicitlyValidVPT(VPTState &Block, 
+                                      ReachingDefAnalysis &RDA) { 
+      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts(); 
+      MachineInstr *VPT = Insts.front(); 
+      assert(isVPTOpcode(VPT->getOpcode()) && 
+             "Expected VPT block to begin with VPT/VPST"); 
+ 
+      if (VPT->getOpcode() == ARM::MVE_VPST) 
+        return false; 
+ 
+      auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) { 
+        MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx)); 
+        return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op); 
+      }; 
+ 
+      auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) { 
+        MachineOperand &MO = MI->getOperand(Idx); 
+        if (!MO.isReg() || !MO.getReg()) 
+          return true; 
+ 
+        SmallPtrSet<MachineInstr *, 2> Defs; 
+        RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs); 
+        if (Defs.empty()) 
+          return true; 
+ 
+        for (auto *Def : Defs) 
+          if (Def->getParent() == VPT->getParent()) 
+            return false; 
+        return true; 
+      }; 
+ 
+      // Check that at least one of the operands is directly predicated on a 
+      // vctp and allow an invariant value too. 
+      return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) && 
+             (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) && 
+             (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2)); 
     }
 
-    unsigned size() const { return Insts.size(); }
-    SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
+    static bool isValid(ReachingDefAnalysis &RDA) { 
+      // All predication within the loop should be based on vctp. If the block 
+      // isn't predicated on entry, check whether the vctp is within the block 
+      // and that all other instructions are then predicated on it. 
+      for (auto &Block : Blocks) { 
+        if (isEntryPredicatedOnVCTP(Block, false) || 
+            hasImplicitlyValidVPT(Block, RDA)) 
+          continue; 
+
+        SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts(); 
+        // We don't know how to convert a block with just a VPT;VCTP into 
+        // anything valid once we remove the VCTP. For now just bail out. 
+        assert(isVPTOpcode(Insts.front()->getOpcode()) && 
+               "Expected VPT block to start with a VPST or VPT!"); 
+        if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST && 
+            isVCTP(Insts.back())) 
+          return false; 
+
+        for (auto *MI : Insts) { 
+          // Check that any internal VCTPs are 'Then' predicated. 
+          if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then) 
+            return false; 
+          // Skip other instructions that build up the predicate. 
+          if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI)) 
+            continue; 
+          // Check that any other instructions are predicated upon a vctp. 
+          // TODO: We could infer when VPTs are implicitly predicated on the 
+          // vctp (when the operands are predicated). 
+          if (!isPredicatedOnVCTP(MI)) { 
+            LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI); 
+            return false; 
+          } 
+        } 
+      } 
+      return true; 
+    } 
+ 
+    VPTState(MachineInstr *MI) { Insts.push_back(MI); } 
+ 
+    void insert(MachineInstr *MI) { 
+      Insts.push_back(MI); 
+      // VPT/VPST + 4 predicated instructions. 
+      assert(Insts.size() <= 5 && "Too many instructions in VPT block!"); 
+    } 
+ 
+    bool containsVCTP() const { 
+      for (auto *MI : Insts) 
+        if (isVCTP(MI)) 
+          return true; 
+      return false; 
+    } 
+ 
+    unsigned size() const { return Insts.size(); } 
+    SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; } 
   };
 
   struct LowOverheadLoop {
@@ -366,13 +366,13 @@ namespace {
     const TargetRegisterInfo &TRI;
     const ARMBaseInstrInfo &TII;
     MachineFunction *MF = nullptr;
-    MachineBasicBlock::iterator StartInsertPt;
-    MachineBasicBlock *StartInsertBB = nullptr;
+    MachineBasicBlock::iterator StartInsertPt; 
+    MachineBasicBlock *StartInsertBB = nullptr; 
     MachineInstr *Start = nullptr;
     MachineInstr *Dec = nullptr;
     MachineInstr *End = nullptr;
-    MachineOperand TPNumElements;
-    SmallVector<MachineInstr*, 4> VCTPs;
+    MachineOperand TPNumElements; 
+    SmallVector<MachineInstr*, 4> VCTPs; 
     SmallPtrSet<MachineInstr*, 4> ToRemove;
     SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
     bool Revert = false;
@@ -381,14 +381,14 @@ namespace {
     LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
                     ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI,
                     const ARMBaseInstrInfo &TII)
-        : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
-          TPNumElements(MachineOperand::CreateImm(0)) {
+        : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII), 
+          TPNumElements(MachineOperand::CreateImm(0)) { 
       MF = ML.getHeader()->getParent();
       if (auto *MBB = ML.getLoopPreheader())
         Preheader = MBB;
       else if (auto *MBB = MLI.findLoopPreheader(&ML, true))
         Preheader = MBB;
-      VPTState::reset();
+      VPTState::reset(); 
     }
 
     // If this is an MVE instruction, check that we know how to use tail
@@ -403,18 +403,18 @@ namespace {
     bool IsTailPredicationLegal() const {
       // For now, let's keep things really simple and only support a single
       // block for tail predication.
-      return !Revert && FoundAllComponents() && !VCTPs.empty() &&
+      return !Revert && FoundAllComponents() && !VCTPs.empty() && 
              !CannotTailPredicate && ML.getNumBlocks() == 1;
     }
 
-    // Given that MI is a VCTP, check that is equivalent to any other VCTPs
-    // found.
-    bool AddVCTP(MachineInstr *MI);
-
+    // Given that MI is a VCTP, check that is equivalent to any other VCTPs 
+    // found. 
+    bool AddVCTP(MachineInstr *MI); 
+ 
     // Check that the predication in the loop will be equivalent once we
     // perform the conversion. Also ensure that we can provide the number
     // of elements to the loop start instruction.
-    bool ValidateTailPredicate();
+    bool ValidateTailPredicate(); 
 
     // Check that any values available outside of the loop will be the same
     // after tail predication conversion.
@@ -427,41 +427,41 @@ namespace {
 
     // Check the branch targets are within range and we satisfy our
     // restrictions.
-    void Validate(ARMBasicBlockUtils *BBUtils);
+    void Validate(ARMBasicBlockUtils *BBUtils); 
 
     bool FoundAllComponents() const {
       return Start && Dec && End;
     }
 
-    SmallVectorImpl<VPTState> &getVPTBlocks() {
-      return VPTState::Blocks;
-    }
+    SmallVectorImpl<VPTState> &getVPTBlocks() { 
+      return VPTState::Blocks; 
+    } 
 
-    // Return the operand for the loop start instruction. This will be the loop
-    // iteration count, or the number of elements if we're tail predicating.
-    MachineOperand &getLoopStartOperand() {
-      if (IsTailPredicationLegal())
-        return TPNumElements;
-      return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0);
+    // Return the operand for the loop start instruction. This will be the loop 
+    // iteration count, or the number of elements if we're tail predicating. 
+    MachineOperand &getLoopStartOperand() { 
+      if (IsTailPredicationLegal()) 
+        return TPNumElements; 
+      return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0); 
     }
 
     unsigned getStartOpcode() const {
-      bool IsDo = isDo(Start);
+      bool IsDo = isDo(Start); 
       if (!IsTailPredicationLegal())
         return IsDo ? ARM::t2DLS : ARM::t2WLS;
 
-      return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
+      return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo); 
     }
 
     void dump() const {
       if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
       if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
       if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
-      if (!VCTPs.empty()) {
-        dbgs() << "ARM Loops: Found VCTP(s):\n";
-        for (auto *MI : VCTPs)
-          dbgs() << " - " << *MI;
-      }
+      if (!VCTPs.empty()) { 
+        dbgs() << "ARM Loops: Found VCTP(s):\n"; 
+        for (auto *MI : VCTPs) 
+          dbgs() << " - " << *MI; 
+      } 
       if (!FoundAllComponents())
         dbgs() << "ARM Loops: Not a low-overhead loop.\n";
       else if (!(Start && Dec && End))
@@ -508,14 +508,14 @@ namespace {
     bool RevertNonLoops();
 
     void RevertWhile(MachineInstr *MI) const;
-    void RevertDo(MachineInstr *MI) const;
+    void RevertDo(MachineInstr *MI) const; 
 
     bool RevertLoopDec(MachineInstr *MI) const;
 
     void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
 
-    void RevertLoopEndDec(MachineInstr *MI) const;
-
+    void RevertLoopEndDec(MachineInstr *MI) const; 
+ 
     void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
 
     MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
@@ -528,230 +528,230 @@ namespace {
 
 char ARMLowOverheadLoops::ID = 0;
 
-SmallVector<VPTState, 4> VPTState::Blocks;
-SetVector<MachineInstr *> VPTState::CurrentPredicates;
-std::map<MachineInstr *,
-         std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
-
+SmallVector<VPTState, 4> VPTState::Blocks; 
+SetVector<MachineInstr *> VPTState::CurrentPredicates; 
+std::map<MachineInstr *, 
+         std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts; 
+ 
 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
                 false, false)
 
-static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
-                      InstSet &ToRemove, InstSet &Ignore) {
-
-  // Check that we can remove all of Killed without having to modify any IT
-  // blocks.
-  auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
-    // Collect the dead code and the MBBs in which they reside.
-    SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks;
-    for (auto *Dead : Killed)
-      BasicBlocks.insert(Dead->getParent());
-
-    // Collect IT blocks in all affected basic blocks.
-    std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
-    for (auto *MBB : BasicBlocks) {
-      for (auto &IT : *MBB) {
-        if (IT.getOpcode() != ARM::t2IT)
-          continue;
-        RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE),
-                                 ITBlocks[&IT]);
-      }
-    }
-
-    // If we're removing all of the instructions within an IT block, then
-    // also remove the IT instruction.
-    SmallPtrSet<MachineInstr *, 2> ModifiedITs;
-    SmallPtrSet<MachineInstr *, 2> RemoveITs;
-    for (auto *Dead : Killed) {
-      if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
-        MachineInstr *IT = RDA.getMIOperand(Dead, *MO);
-        RemoveITs.insert(IT);
-        auto &CurrentBlock = ITBlocks[IT];
-        CurrentBlock.erase(Dead);
-        if (CurrentBlock.empty())
-          ModifiedITs.erase(IT);
-        else
-          ModifiedITs.insert(IT);
-      }
+static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA, 
+                      InstSet &ToRemove, InstSet &Ignore) { 
+
+  // Check that we can remove all of Killed without having to modify any IT 
+  // blocks. 
+  auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) { 
+    // Collect the dead code and the MBBs in which they reside. 
+    SmallPtrSet<MachineBasicBlock*, 2> BasicBlocks; 
+    for (auto *Dead : Killed) 
+      BasicBlocks.insert(Dead->getParent()); 
+
+    // Collect IT blocks in all affected basic blocks. 
+    std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks; 
+    for (auto *MBB : BasicBlocks) { 
+      for (auto &IT : *MBB) { 
+        if (IT.getOpcode() != ARM::t2IT) 
+          continue; 
+        RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE), 
+                                 ITBlocks[&IT]); 
+      } 
+    } 
+
+    // If we're removing all of the instructions within an IT block, then 
+    // also remove the IT instruction. 
+    SmallPtrSet<MachineInstr *, 2> ModifiedITs; 
+    SmallPtrSet<MachineInstr *, 2> RemoveITs; 
+    for (auto *Dead : Killed) { 
+      if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) { 
+        MachineInstr *IT = RDA.getMIOperand(Dead, *MO); 
+        RemoveITs.insert(IT); 
+        auto &CurrentBlock = ITBlocks[IT]; 
+        CurrentBlock.erase(Dead); 
+        if (CurrentBlock.empty()) 
+          ModifiedITs.erase(IT); 
+        else 
+          ModifiedITs.insert(IT); 
+      } 
+    } 
+    if (!ModifiedITs.empty()) 
+      return false; 
+    Killed.insert(RemoveITs.begin(), RemoveITs.end()); 
+    return true; 
+  }; 
+
+  SmallPtrSet<MachineInstr *, 2> Uses; 
+  if (!RDA.isSafeToRemove(MI, Uses, Ignore)) 
+    return false; 
+
+  if (WontCorruptITs(Uses, RDA)) { 
+    ToRemove.insert(Uses.begin(), Uses.end()); 
+    LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI 
+               << " - can also remove:\n"; 
+               for (auto *Use : Uses) 
+                 dbgs() << "   - " << *Use); 
+
+    SmallPtrSet<MachineInstr*, 4> Killed; 
+    RDA.collectKilledOperands(MI, Killed); 
+    if (WontCorruptITs(Killed, RDA)) { 
+      ToRemove.insert(Killed.begin(), Killed.end()); 
+      LLVM_DEBUG(for (auto *Dead : Killed) 
+                   dbgs() << "   - " << *Dead); 
     }
-    if (!ModifiedITs.empty())
-      return false;
-    Killed.insert(RemoveITs.begin(), RemoveITs.end());
-    return true;
-  };
-
-  SmallPtrSet<MachineInstr *, 2> Uses;
-  if (!RDA.isSafeToRemove(MI, Uses, Ignore))
-    return false;
-
-  if (WontCorruptITs(Uses, RDA)) {
-    ToRemove.insert(Uses.begin(), Uses.end());
-    LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
-               << " - can also remove:\n";
-               for (auto *Use : Uses)
-                 dbgs() << "   - " << *Use);
-
-    SmallPtrSet<MachineInstr*, 4> Killed;
-    RDA.collectKilledOperands(MI, Killed);
-    if (WontCorruptITs(Killed, RDA)) {
-      ToRemove.insert(Killed.begin(), Killed.end());
-      LLVM_DEBUG(for (auto *Dead : Killed)
-                   dbgs() << "   - " << *Dead);
-    }
-    return true;
-  }
-  return false;
-}
-
-bool LowOverheadLoop::ValidateTailPredicate() {
-  if (!IsTailPredicationLegal()) {
-    LLVM_DEBUG(if (VCTPs.empty())
-                 dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
-               dbgs() << "ARM Loops: Tail-predication is not valid.\n");
-    return false;
+    return true; 
   }
-
-  assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
-  assert(ML.getBlocks().size() == 1 &&
-         "Shouldn't be processing a loop with more than one block");
-
-  if (DisableTailPredication) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
+  return false; 
+} 
+
+bool LowOverheadLoop::ValidateTailPredicate() { 
+  if (!IsTailPredicationLegal()) { 
+    LLVM_DEBUG(if (VCTPs.empty()) 
+                 dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"; 
+               dbgs() << "ARM Loops: Tail-predication is not valid.\n"); 
     return false;
-  }
+  } 
 
-  if (!VPTState::isValid(RDA)) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
-    return false;
-  }
+  assert(!VCTPs.empty() && "VCTP instruction expected but is not set"); 
+  assert(ML.getBlocks().size() == 1 && 
+         "Shouldn't be processing a loop with more than one block"); 
 
-  if (!ValidateLiveOuts()) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
+  if (DisableTailPredication) { 
+    LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n"); 
     return false;
   }
 
-  // Check that creating a [W|D]LSTP, which will define LR with an element
-  // count instead of iteration count, won't affect any other instructions
-  // than the LoopStart and LoopDec.
-  // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
-  Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
-                                  : Start->getOperand(0).getReg();
-  if (StartInsertPt == Start && StartReg == ARM::LR) {
-    if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
-      SmallPtrSet<MachineInstr *, 2> Uses;
-      RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
-      for (auto *Use : Uses) {
-        if (Use != Start && Use != Dec) {
-          LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
-          return false;
-        }
+  if (!VPTState::isValid(RDA)) { 
+    LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n"); 
+    return false; 
+  } 
+ 
+  if (!ValidateLiveOuts()) { 
+    LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n"); 
+    return false; 
+  } 
+ 
+  // Check that creating a [W|D]LSTP, which will define LR with an element 
+  // count instead of iteration count, won't affect any other instructions 
+  // than the LoopStart and LoopDec. 
+  // TODO: We should try to insert the [W|D]LSTP after any of the other uses. 
+  Register StartReg = isDo(Start) ? Start->getOperand(1).getReg() 
+                                  : Start->getOperand(0).getReg(); 
+  if (StartInsertPt == Start && StartReg == ARM::LR) { 
+    if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) { 
+      SmallPtrSet<MachineInstr *, 2> Uses; 
+      RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses); 
+      for (auto *Use : Uses) { 
+        if (Use != Start && Use != Dec) { 
+          LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use); 
+          return false; 
+        } 
       }
     }
   }
 
-  // For tail predication, we need to provide the number of elements, instead
-  // of the iteration count, to the loop start instruction. The number of
-  // elements is provided to the vctp instruction, so we need to check that
-  // we can use this register at InsertPt.
-  MachineInstr *VCTP = VCTPs.back();
-  if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
-    TPNumElements = Start->getOperand(2);
-    StartInsertPt = Start;
-    StartInsertBB = Start->getParent();
-  } else {
-    TPNumElements = VCTP->getOperand(1);
-    MCRegister NumElements = TPNumElements.getReg().asMCReg();
-
-    // If the register is defined within loop, then we can't perform TP.
-    // TODO: Check whether this is just a mov of a register that would be
-    // available.
-    if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
+  // For tail predication, we need to provide the number of elements, instead 
+  // of the iteration count, to the loop start instruction. The number of 
+  // elements is provided to the vctp instruction, so we need to check that 
+  // we can use this register at InsertPt. 
+  MachineInstr *VCTP = VCTPs.back(); 
+  if (Start->getOpcode() == ARM::t2DoLoopStartTP) { 
+    TPNumElements = Start->getOperand(2); 
+    StartInsertPt = Start; 
+    StartInsertBB = Start->getParent(); 
+  } else { 
+    TPNumElements = VCTP->getOperand(1); 
+    MCRegister NumElements = TPNumElements.getReg().asMCReg(); 
+
+    // If the register is defined within loop, then we can't perform TP. 
+    // TODO: Check whether this is just a mov of a register that would be 
+    // available. 
+    if (RDA.hasLocalDefBefore(VCTP, NumElements)) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n"); 
+      return false; 
+    } 
+
+    // The element count register maybe defined after InsertPt, in which case we 
+    // need to try to move either InsertPt or the def so that the [w|d]lstp can 
+    // use the value. 
+
+    if (StartInsertPt != StartInsertBB->end() && 
+        !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) { 
+      if (auto *ElemDef = 
+              RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) { 
+        if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) { 
+          ElemDef->removeFromParent(); 
+          StartInsertBB->insert(StartInsertPt, ElemDef); 
+          LLVM_DEBUG(dbgs() 
+                     << "ARM Loops: Moved element count def: " << *ElemDef); 
+        } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) { 
+          StartInsertPt->removeFromParent(); 
+          StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), 
+                                     &*StartInsertPt); 
+          LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); 
+        } else { 
+          // If we fail to move an instruction and the element count is provided 
+          // by a mov, use the mov operand if it will have the same value at the 
+          // insertion point 
+          MachineOperand Operand = ElemDef->getOperand(1); 
+          if (isMovRegOpcode(ElemDef->getOpcode()) && 
+              RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) == 
+                  RDA.getUniqueReachingMIDef(&*StartInsertPt, 
+                                             Operand.getReg().asMCReg())) { 
+            TPNumElements = Operand; 
+            NumElements = TPNumElements.getReg(); 
+          } else { 
+            LLVM_DEBUG(dbgs() 
+                       << "ARM Loops: Unable to move element count to loop " 
+                       << "start instruction.\n"); 
+            return false; 
+          } 
+        } 
+      } 
+    } 
+
+    // Especially in the case of while loops, InsertBB may not be the 
+    // preheader, so we need to check that the register isn't redefined 
+    // before entering the loop. 
+    auto CannotProvideElements = [this](MachineBasicBlock *MBB, 
+                                        MCRegister NumElements) { 
+      if (MBB->empty()) 
+        return false; 
+      // NumElements is redefined in this block. 
+      if (RDA.hasLocalDefBefore(&MBB->back(), NumElements)) 
+        return true; 
+ 
+      // Don't continue searching up through multiple predecessors. 
+      if (MBB->pred_size() > 1) 
+        return true; 
+ 
       return false;
+    }; 
+ 
+    // Search backwards for a def, until we get to InsertBB. 
+    MachineBasicBlock *MBB = Preheader; 
+    while (MBB && MBB != StartInsertBB) { 
+      if (CannotProvideElements(MBB, NumElements)) { 
+        LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n"); 
+        return false; 
+      } 
+      MBB = *MBB->pred_begin(); 
     }
-
-    // The element count register maybe defined after InsertPt, in which case we
-    // need to try to move either InsertPt or the def so that the [w|d]lstp can
-    // use the value.
-
-    if (StartInsertPt != StartInsertBB->end() &&
-        !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
-      if (auto *ElemDef =
-              RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
-        if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
-          ElemDef->removeFromParent();
-          StartInsertBB->insert(StartInsertPt, ElemDef);
-          LLVM_DEBUG(dbgs()
-                     << "ARM Loops: Moved element count def: " << *ElemDef);
-        } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
-          StartInsertPt->removeFromParent();
-          StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
-                                     &*StartInsertPt);
-          LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
-        } else {
-          // If we fail to move an instruction and the element count is provided
-          // by a mov, use the mov operand if it will have the same value at the
-          // insertion point
-          MachineOperand Operand = ElemDef->getOperand(1);
-          if (isMovRegOpcode(ElemDef->getOpcode()) &&
-              RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
-                  RDA.getUniqueReachingMIDef(&*StartInsertPt,
-                                             Operand.getReg().asMCReg())) {
-            TPNumElements = Operand;
-            NumElements = TPNumElements.getReg();
-          } else {
-            LLVM_DEBUG(dbgs()
-                       << "ARM Loops: Unable to move element count to loop "
-                       << "start instruction.\n");
-            return false;
-          }
-        }
-      }
-    }
-
-    // Especially in the case of while loops, InsertBB may not be the
-    // preheader, so we need to check that the register isn't redefined
-    // before entering the loop.
-    auto CannotProvideElements = [this](MachineBasicBlock *MBB,
-                                        MCRegister NumElements) {
-      if (MBB->empty())
-        return false;
-      // NumElements is redefined in this block.
-      if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
-        return true;
-
-      // Don't continue searching up through multiple predecessors.
-      if (MBB->pred_size() > 1)
-        return true;
-
-      return false;
-    };
-
-    // Search backwards for a def, until we get to InsertBB.
-    MachineBasicBlock *MBB = Preheader;
-    while (MBB && MBB != StartInsertBB) {
-      if (CannotProvideElements(MBB, NumElements)) {
-        LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
-        return false;
-      }
-      MBB = *MBB->pred_begin();
-    }
-  }
-
-  // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
-  // world the [w|d]lstp instruction would be last instruction in the preheader
-  // and so it would only affect instructions within the loop body. But due to
-  // scheduling, and/or the logic in this pass (above), the insertion point can
-  // be moved earlier. So if the Loop Start isn't the last instruction in the
-  // preheader, and if the initial element count is smaller than the vector
-  // width, the Loop Start instruction will immediately generate one or more
-  // false lane mask which can, incorrectly, affect the proceeding MVE
-  // instructions in the preheader.
-  if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
-    return false;
   }
 
+  // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect 
+  // world the [w|d]lstp instruction would be last instruction in the preheader 
+  // and so it would only affect instructions within the loop body. But due to 
+  // scheduling, and/or the logic in this pass (above), the insertion point can 
+  // be moved earlier. So if the Loop Start isn't the last instruction in the 
+  // preheader, and if the initial element count is smaller than the vector 
+  // width, the Loop Start instruction will immediately generate one or more 
+  // false lane mask which can, incorrectly, affect the proceeding MVE 
+  // instructions in the preheader. 
+  if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) { 
+    LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n"); 
+    return false; 
+  } 
+ 
   // Check that the value change of the element count is what we expect and
   // that the predication will be equivalent. For this we need:
   // NumElements = NumElements - VectorWidth. The sub will be a sub immediate
@@ -760,20 +760,20 @@ bool LowOverheadLoop::ValidateTailPredicate() {
     return -getAddSubImmediate(*MI) == ExpectedVecWidth;
   };
 
-  MachineBasicBlock *MBB = VCTP->getParent();
-  // Remove modifications to the element count since they have no purpose in a
-  // tail predicated loop. Explicitly refer to the vctp operand no matter which
-  // register NumElements has been assigned to, since that is what the
-  // modifications will be using
-  if (auto *Def = RDA.getUniqueReachingMIDef(
-          &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
+  MachineBasicBlock *MBB = VCTP->getParent(); 
+  // Remove modifications to the element count since they have no purpose in a 
+  // tail predicated loop. Explicitly refer to the vctp operand no matter which 
+  // register NumElements has been assigned to, since that is what the 
+  // modifications will be using 
+  if (auto *Def = RDA.getUniqueReachingMIDef( 
+          &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) { 
     SmallPtrSet<MachineInstr*, 2> ElementChain;
-    SmallPtrSet<MachineInstr*, 2> Ignore;
+    SmallPtrSet<MachineInstr*, 2> Ignore; 
     unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
 
-    Ignore.insert(VCTPs.begin(), VCTPs.end());
+    Ignore.insert(VCTPs.begin(), VCTPs.end()); 
 
-    if (TryRemove(Def, RDA, ElementChain, Ignore)) {
+    if (TryRemove(Def, RDA, ElementChain, Ignore)) { 
       bool FoundSub = false;
 
       for (auto *MI : ElementChain) {
@@ -781,17 +781,17 @@ bool LowOverheadLoop::ValidateTailPredicate() {
           continue;
 
         if (isSubImmOpcode(MI->getOpcode())) {
-          if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
-            LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
-                       " count: " << *MI);
+          if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) { 
+            LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" 
+                       " count: " << *MI); 
             return false;
-          }
+          } 
           FoundSub = true;
-        } else {
-          LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
-                     " count: " << *MI);
+        } else { 
+          LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" 
+                     " count: " << *MI); 
           return false;
-        }
+        } 
       }
       ToRemove.insert(ElementChain.begin(), ElementChain.end());
     }
@@ -868,18 +868,18 @@ static bool producesFalseLanesZero(MachineInstr &MI,
   if (canGenerateNonZeros(MI))
     return false;
 
-  bool isPredicated = isVectorPredicated(&MI);
-  // Predicated loads will write zeros to the falsely predicated bytes of the
-  // destination register.
-  if (MI.mayLoad())
-    return isPredicated;
-
-  auto IsZeroInit = [](MachineInstr *Def) {
-    return !isVectorPredicated(Def) &&
-           Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
-           Def->getOperand(1).getImm() == 0;
-  };
-
+  bool isPredicated = isVectorPredicated(&MI); 
+  // Predicated loads will write zeros to the falsely predicated bytes of the 
+  // destination register. 
+  if (MI.mayLoad()) 
+    return isPredicated; 
+ 
+  auto IsZeroInit = [](MachineInstr *Def) { 
+    return !isVectorPredicated(Def) && 
+           Def->getOpcode() == ARM::MVE_VMOVimmi32 && 
+           Def->getOperand(1).getImm() == 0; 
+  }; 
+ 
   bool AllowScalars = isHorizontalReduction(MI);
   for (auto &MO : MI.operands()) {
     if (!MO.isReg() || !MO.getReg())
@@ -887,21 +887,21 @@ static bool producesFalseLanesZero(MachineInstr &MI,
     if (!isRegInClass(MO, QPRs) && AllowScalars)
       continue;
 
-    // Check that this instruction will produce zeros in its false lanes:
-    // - If it only consumes false lanes zero or constant 0 (vmov #0)
-    // - If it's predicated, it only matters that it's def register already has
-    //   false lane zeros, so we can ignore the uses.
-    SmallPtrSet<MachineInstr *, 2> Defs;
-    RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
-    for (auto *Def : Defs) {
-      if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
-        continue;
-      if (MO.isUse() && isPredicated)
-        continue;
+    // Check that this instruction will produce zeros in its false lanes: 
+    // - If it only consumes false lanes zero or constant 0 (vmov #0) 
+    // - If it's predicated, it only matters that it's def register already has 
+    //   false lane zeros, so we can ignore the uses. 
+    SmallPtrSet<MachineInstr *, 2> Defs; 
+    RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs); 
+    for (auto *Def : Defs) { 
+      if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def)) 
+        continue; 
+      if (MO.isUse() && isPredicated) 
+        continue; 
       return false;
-    }
+    } 
   }
-  LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
+  LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI); 
   return true;
 }
 
@@ -921,7 +921,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
   // the false lanes are zeroed and here we're trying to track that those false
   // lanes remain zero, or where they change, the differences are masked away
   // by their user(s).
-  // All MVE stores have to be predicated, so we know that any predicate load
+  // All MVE stores have to be predicated, so we know that any predicate load 
   // operands, or stored results are equivalent already. Other explicitly
   // predicated instructions will perform the same operation in the original
   // loop and the tail-predicated form too. Because of this, we can insert
@@ -934,32 +934,32 @@ bool LowOverheadLoop::ValidateLiveOuts() {
   MachineBasicBlock *Header = ML.getHeader();
 
   for (auto &MI : *Header) {
-    if (!shouldInspect(MI))
+    if (!shouldInspect(MI)) 
       continue;
 
     if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
       continue;
 
-    bool isPredicated = isVectorPredicated(&MI);
-    bool retainsOrReduces =
-      retainsPreviousHalfElement(MI) || isHorizontalReduction(MI);
-
-    if (isPredicated)
+    bool isPredicated = isVectorPredicated(&MI); 
+    bool retainsOrReduces = 
+      retainsPreviousHalfElement(MI) || isHorizontalReduction(MI); 
+ 
+    if (isPredicated) 
       Predicated.insert(&MI);
-    if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero))
-      FalseLanesZero.insert(&MI);
-    else if (MI.getNumDefs() == 0)
+    if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) 
+      FalseLanesZero.insert(&MI); 
+    else if (MI.getNumDefs() == 0) 
       continue;
-    else if (!isPredicated && retainsOrReduces)
-      return false;
-    else if (!isPredicated)
+    else if (!isPredicated && retainsOrReduces) 
+      return false; 
+    else if (!isPredicated) 
       FalseLanesUnknown.insert(&MI);
   }
 
   auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
                               SmallPtrSetImpl<MachineInstr *> &Predicated) {
     SmallPtrSet<MachineInstr *, 2> Uses;
-    RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
+    RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses); 
     for (auto *Use : Uses) {
       if (Use != MI && !Predicated.count(Use))
         return false;
@@ -982,12 +982,12 @@ bool LowOverheadLoop::ValidateLiveOuts() {
         LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
                           << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
         NonPredicated.insert(MI);
-        break;
+        break; 
       }
     }
     // Any unknown false lanes have been masked away by the user(s).
-    if (!NonPredicated.contains(MI))
-      Predicated.insert(MI);
+    if (!NonPredicated.contains(MI)) 
+      Predicated.insert(MI); 
   }
 
   SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
@@ -997,13 +997,13 @@ bool LowOverheadLoop::ValidateLiveOuts() {
   assert(ExitBlocks.size() == 1 && "Expected a single exit block");
   MachineBasicBlock *ExitBB = ExitBlocks.front();
   for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
-    // TODO: Instead of blocking predication, we could move the vctp to the exit
-    // block and calculate it's operand there in or the preheader.
-    if (RegMask.PhysReg == ARM::VPR)
-      return false;
+    // TODO: Instead of blocking predication, we could move the vctp to the exit 
+    // block and calculate it's operand there in or the preheader. 
+    if (RegMask.PhysReg == ARM::VPR) 
+      return false; 
     // Check Q-regs that are live in the exit blocks. We don't collect scalars
     // because they won't be affected by lane predication.
-    if (QPRs->contains(RegMask.PhysReg))
+    if (QPRs->contains(RegMask.PhysReg)) 
       if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
         LiveOutMIs.insert(MI);
   }
@@ -1014,123 +1014,123 @@ bool LowOverheadLoop::ValidateLiveOuts() {
   // instruction needs to be predicated, so check this here. The instructions
   // in NonPredicated have been found to be a reduction that we can ensure its
   // legality.
-  for (auto *MI : LiveOutMIs) {
-    if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
+  for (auto *MI : LiveOutMIs) { 
+    if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI); 
       return false;
-    }
-  }
+    } 
+  } 
 
   return true;
 }
 
-void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
+void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { 
   if (Revert)
     return;
 
-  // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
-  // can only jump back.
-  auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
-                           ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
-    MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
-                                   ? End->getOperand(1).getMBB()
-                                   : End->getOperand(2).getMBB();
-    // TODO Maybe there's cases where the target doesn't have to be the header,
-    // but for now be safe and revert.
-    if (TgtBB != ML.getHeader()) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
-      return false;
-    }
-
-    // The WLS and LE instructions have 12-bits for the label offset. WLS
-    // requires a positive offset, while LE uses negative.
-    if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
-        !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
-      return false;
-    }
-
-    if (Start->getOpcode() == ARM::t2WhileLoopStart &&
-        (BBUtils->getOffsetOf(Start) >
-         BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
-         !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
-      return false;
-    }
-    return true;
-  };
-
-  // Find a suitable position to insert the loop start instruction. It needs to
-  // be able to safely define LR.
-  auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec,
-                                    MachineBasicBlock::iterator &InsertPt,
-                                    MachineBasicBlock *&InsertBB,
-                                    ReachingDefAnalysis &RDA,
-                                    InstSet &ToRemove) {
-    // For a t2DoLoopStart it is always valid to use the start insertion point.
-    // For WLS we can define LR if LR already contains the same value.
-    if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) {
-      InsertPt = MachineBasicBlock::iterator(Start);
-      InsertBB = Start->getParent();
-      return true;
-    }
-
-    // We've found no suitable LR def and Start doesn't use LR directly. Can we
-    // just define LR anyway?
-    if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR)))
-      return false;
-
-    InsertPt = MachineBasicBlock::iterator(Start);
-    InsertBB = Start->getParent();
-    return true;
-  };
-
-  if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
-                               ToRemove)) {
+  // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP] 
+  // can only jump back. 
+  auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End, 
+                           ARMBasicBlockUtils *BBUtils, MachineLoop &ML) { 
+    MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd 
+                                   ? End->getOperand(1).getMBB() 
+                                   : End->getOperand(2).getMBB(); 
+    // TODO Maybe there's cases where the target doesn't have to be the header, 
+    // but for now be safe and revert. 
+    if (TgtBB != ML.getHeader()) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n"); 
+      return false; 
+    } 
+
+    // The WLS and LE instructions have 12-bits for the label offset. WLS 
+    // requires a positive offset, while LE uses negative. 
+    if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) || 
+        !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n"); 
+      return false; 
+    } 
+
+    if (Start->getOpcode() == ARM::t2WhileLoopStart && 
+        (BBUtils->getOffsetOf(Start) > 
+         BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) || 
+         !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n"); 
+      return false; 
+    } 
+    return true; 
+  }; 
+
+  // Find a suitable position to insert the loop start instruction. It needs to 
+  // be able to safely define LR. 
+  auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec, 
+                                    MachineBasicBlock::iterator &InsertPt, 
+                                    MachineBasicBlock *&InsertBB, 
+                                    ReachingDefAnalysis &RDA, 
+                                    InstSet &ToRemove) { 
+    // For a t2DoLoopStart it is always valid to use the start insertion point. 
+    // For WLS we can define LR if LR already contains the same value. 
+    if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) { 
+      InsertPt = MachineBasicBlock::iterator(Start); 
+      InsertBB = Start->getParent(); 
+      return true; 
+    } 
+
+    // We've found no suitable LR def and Start doesn't use LR directly. Can we 
+    // just define LR anyway? 
+    if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR))) 
+      return false; 
+ 
+    InsertPt = MachineBasicBlock::iterator(Start); 
+    InsertBB = Start->getParent(); 
+    return true; 
+  }; 
+ 
+  if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, 
+                               ToRemove)) { 
     LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
     Revert = true;
     return;
-  }
-  LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end())
-               dbgs() << "ARM Loops: Will insert LoopStart at end of block\n";
-             else
-               dbgs() << "ARM Loops: Will insert LoopStart at "
-                      << *StartInsertPt
-            );
-
-  Revert = !ValidateRanges(Start, End, BBUtils, ML);
-  CannotTailPredicate = !ValidateTailPredicate();
-}
-
-bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
-  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
-  if (VCTPs.empty()) {
-    VCTPs.push_back(MI);
-    return true;
+  } 
+  LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end()) 
+               dbgs() << "ARM Loops: Will insert LoopStart at end of block\n"; 
+             else 
+               dbgs() << "ARM Loops: Will insert LoopStart at " 
+                      << *StartInsertPt 
+            ); 
+
+  Revert = !ValidateRanges(Start, End, BBUtils, ML); 
+  CannotTailPredicate = !ValidateTailPredicate(); 
+} 
+ 
+bool LowOverheadLoop::AddVCTP(MachineInstr *MI) { 
+  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI); 
+  if (VCTPs.empty()) { 
+    VCTPs.push_back(MI); 
+    return true; 
   }
 
-  // If we find another VCTP, check whether it uses the same value as the main VCTP.
-  // If it does, store it in the VCTPs set, else refuse it.
-  MachineInstr *Prev = VCTPs.back();
-  if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
-      !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
-    LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
-                         "definition from the main VCTP");
-    return false;
-  }
-  VCTPs.push_back(MI);
-  return true;
+  // If we find another VCTP, check whether it uses the same value as the main VCTP. 
+  // If it does, store it in the VCTPs set, else refuse it. 
+  MachineInstr *Prev = VCTPs.back(); 
+  if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) || 
+      !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) { 
+    LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching " 
+                         "definition from the main VCTP"); 
+    return false; 
+  } 
+  VCTPs.push_back(MI); 
+  return true; 
 }
 
 bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
   if (CannotTailPredicate)
     return false;
 
-  if (!shouldInspect(*MI))
-    return true;
+  if (!shouldInspect(*MI)) 
+    return true; 
 
-  if (MI->getOpcode() == ARM::MVE_VPSEL ||
-      MI->getOpcode() == ARM::MVE_VPNOT) {
+  if (MI->getOpcode() == ARM::MVE_VPSEL || 
+      MI->getOpcode() == ARM::MVE_VPNOT) { 
     // TODO: Allow VPSEL and VPNOT, we currently cannot because:
     // 1) It will use the VPR as a predicate operand, but doesn't have to be
     //    instead a VPT block, which means we can assert while building up
@@ -1142,24 +1142,24 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
     return false;
   }
 
-  // Record all VCTPs and check that they're equivalent to one another.
-  if (isVCTP(MI) && !AddVCTP(MI))
-    return false;
-
-  // Inspect uses first so that any instructions that alter the VPR don't
-  // alter the predicate upon themselves.
-  const MCInstrDesc &MCID = MI->getDesc();
+  // Record all VCTPs and check that they're equivalent to one another. 
+  if (isVCTP(MI) && !AddVCTP(MI)) 
+    return false; 
+ 
+  // Inspect uses first so that any instructions that alter the VPR don't 
+  // alter the predicate upon themselves. 
+  const MCInstrDesc &MCID = MI->getDesc(); 
   bool IsUse = false;
-  unsigned LastOpIdx = MI->getNumOperands() - 1;
-  for (auto &Op : enumerate(reverse(MCID.operands()))) {
-    const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
-    if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
+  unsigned LastOpIdx = MI->getNumOperands() - 1; 
+  for (auto &Op : enumerate(reverse(MCID.operands()))) { 
+    const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index()); 
+    if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR) 
       continue;
 
-    if (ARM::isVpred(Op.value().OperandType)) {
-      VPTState::addInst(MI);
+    if (ARM::isVpred(Op.value().OperandType)) { 
+      VPTState::addInst(MI); 
       IsUse = true;
-    } else if (MI->getOpcode() != ARM::MVE_VPST) {
+    } else if (MI->getOpcode() != ARM::MVE_VPST) { 
       LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
       return false;
     }
@@ -1168,36 +1168,36 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
   // If we find an instruction that has been marked as not valid for tail
   // predication, only allow the instruction if it's contained within a valid
   // VPT block.
-  bool RequiresExplicitPredication =
-    (MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
-  if (isDomainMVE(MI) && RequiresExplicitPredication) {
-    LLVM_DEBUG(if (!IsUse)
-               dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
-    return IsUse;
+  bool RequiresExplicitPredication = 
+    (MCID.TSFlags & ARMII::ValidForTailPredication) == 0; 
+  if (isDomainMVE(MI) && RequiresExplicitPredication) { 
+    LLVM_DEBUG(if (!IsUse) 
+               dbgs() << "ARM Loops: Can't tail predicate: " << *MI); 
+    return IsUse; 
   }
 
   // If the instruction is already explicitly predicated, then the conversion
-  // will be fine, but ensure that all store operations are predicated.
-  if (MI->mayStore())
-    return IsUse;
-
-  // If this instruction defines the VPR, update the predicate for the
-  // proceeding instructions.
-  if (isVectorPredicate(MI)) {
-    // Clear the existing predicate when we're not in VPT Active state,
-    // otherwise we add to it.
-    if (!isVectorPredicated(MI))
-      VPTState::resetPredicate(MI);
-    else
-      VPTState::addPredicate(MI);
-  }
-
-  // Finally once the predicate has been modified, we can start a new VPT
-  // block if necessary.
-  if (isVPTOpcode(MI->getOpcode()))
-    VPTState::CreateVPTBlock(MI);
-
-  return true;
+  // will be fine, but ensure that all store operations are predicated. 
+  if (MI->mayStore()) 
+    return IsUse; 
+ 
+  // If this instruction defines the VPR, update the predicate for the 
+  // proceeding instructions. 
+  if (isVectorPredicate(MI)) { 
+    // Clear the existing predicate when we're not in VPT Active state, 
+    // otherwise we add to it. 
+    if (!isVectorPredicated(MI)) 
+      VPTState::resetPredicate(MI); 
+    else 
+      VPTState::addPredicate(MI); 
+  } 
+ 
+  // Finally once the predicate has been modified, we can start a new VPT 
+  // block if necessary. 
+  if (isVPTOpcode(MI->getOpcode())) 
+    VPTState::CreateVPTBlock(MI); 
+ 
+  return true; 
 }
 
 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
@@ -1220,7 +1220,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
 
   bool Changed = false;
   for (auto ML : *MLI) {
-    if (ML->isOutermost())
+    if (ML->isOutermost()) 
       Changed |= ProcessLoop(ML);
   }
   Changed |= RevertNonLoops();
@@ -1279,8 +1279,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
         LoLoop.Dec = &MI;
       else if (MI.getOpcode() == ARM::t2LoopEnd)
         LoLoop.End = &MI;
-      else if (MI.getOpcode() == ARM::t2LoopEndDec)
-        LoLoop.End = LoLoop.Dec = &MI;
+      else if (MI.getOpcode() == ARM::t2LoopEndDec) 
+        LoLoop.End = LoLoop.Dec = &MI; 
       else if (isLoopStart(MI))
         LoLoop.Start = &MI;
       else if (MI.getDesc().isCall()) {
@@ -1303,18 +1303,18 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
     return false;
   }
 
-  // Check that the only instruction using LoopDec is LoopEnd. This can only
-  // happen when the Dec and End are separate, not a single t2LoopEndDec.
+  // Check that the only instruction using LoopDec is LoopEnd. This can only 
+  // happen when the Dec and End are separate, not a single t2LoopEndDec. 
   // TODO: Check for copy chains that really have no effect.
-  if (LoLoop.Dec != LoLoop.End) {
-    SmallPtrSet<MachineInstr *, 2> Uses;
-    RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
-    if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
-      LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
-      LoLoop.Revert = true;
-    }
+  if (LoLoop.Dec != LoLoop.End) { 
+    SmallPtrSet<MachineInstr *, 2> Uses; 
+    RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses); 
+    if (Uses.size() > 1 || !Uses.count(LoLoop.End)) { 
+      LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n"); 
+      LoLoop.Revert = true; 
+    } 
   }
-  LoLoop.Validate(BBUtils.get());
+  LoLoop.Validate(BBUtils.get()); 
   Expand(LoLoop);
   return true;
 }
@@ -1329,14 +1329,14 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  RevertWhileLoopStart(MI, TII, BrOpc);
-}
-
-void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
-  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
-  RevertDoLoopStart(MI, TII);
+  RevertWhileLoopStart(MI, TII, BrOpc); 
 }
 
+void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const { 
+  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI); 
+  RevertDoLoopStart(MI, TII); 
+} 
+ 
 bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
   MachineBasicBlock *MBB = MI->getParent();
@@ -1349,10 +1349,10 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
   }
 
   // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
-  bool SetFlags =
-      RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
+  bool SetFlags = 
+      RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore); 
 
-  llvm::RevertLoopDec(MI, TII, SetFlags);
+  llvm::RevertLoopDec(MI, TII, SetFlags); 
   return SetFlags;
 }
 
@@ -1364,35 +1364,35 @@ void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
-}
-
-// Generate a subs, or sub and cmp, and a branch instead of an LE.
-void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
-  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
-  assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
-  MachineBasicBlock *MBB = MI->getParent();
-
+  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp); 
+} 
+ 
+// Generate a subs, or sub and cmp, and a branch instead of an LE. 
+void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const { 
+  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI); 
+  assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!"); 
+  MachineBasicBlock *MBB = MI->getParent(); 
+ 
   MachineInstrBuilder MIB =
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
-  MIB.addDef(ARM::LR);
-  MIB.add(MI->getOperand(1));
-  MIB.addImm(1);
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(ARM::NoRegister);
-  MIB.addReg(ARM::CPSR);
-  MIB->getOperand(5).setIsDef(true);
-
-  MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
-  unsigned BrOpc =
-      BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
-
-  // Create bne
-  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(2)); // branch target
-  MIB.addImm(ARMCC::NE);      // condition code
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 
+  MIB.addDef(ARM::LR); 
+  MIB.add(MI->getOperand(1)); 
+  MIB.addImm(1); 
+  MIB.addImm(ARMCC::AL); 
+  MIB.addReg(ARM::NoRegister); 
   MIB.addReg(ARM::CPSR);
-
+  MIB->getOperand(5).setIsDef(true); 
+ 
+  MachineBasicBlock *DestBB = MI->getOperand(2).getMBB(); 
+  unsigned BrOpc = 
+      BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; 
+ 
+  // Create bne 
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); 
+  MIB.add(MI->getOperand(2)); // branch target 
+  MIB.addImm(ARMCC::NE);      // condition code 
+  MIB.addReg(ARM::CPSR); 
+ 
   MI->eraseFromParent();
 }
 
@@ -1403,7 +1403,7 @@ void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
 //
 //   $lr = big-itercount-expression
 //   ..
-//   $lr = t2DoLoopStart renamable $lr
+//   $lr = t2DoLoopStart renamable $lr 
 //   vector.body:
 //     ..
 //     $vpr = MVE_VCTP32 renamable $r3
@@ -1426,8 +1426,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
 
   LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
 
-  MachineInstr *Def =
-      RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
+  MachineInstr *Def = 
+      RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0); 
   if (!Def) {
     LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
     return;
@@ -1435,8 +1435,8 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
 
   // Collect and remove the users of iteration count.
   SmallPtrSet<MachineInstr*, 4> Killed  = { LoLoop.Start, LoLoop.Dec,
-                                            LoLoop.End };
-  if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
+                                            LoLoop.End }; 
+  if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) 
     LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
 }
 
@@ -1446,18 +1446,18 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
   // calculate the number of loop iterations.
   IterationCountDCE(LoLoop);
 
-  MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
+  MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt; 
   MachineInstr *Start = LoLoop.Start;
-  MachineBasicBlock *MBB = LoLoop.StartInsertBB;
+  MachineBasicBlock *MBB = LoLoop.StartInsertBB; 
   unsigned Opc = LoLoop.getStartOpcode();
-  MachineOperand &Count = LoLoop.getLoopStartOperand();
+  MachineOperand &Count = LoLoop.getLoopStartOperand(); 
 
   MachineInstrBuilder MIB =
-    BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
+    BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc)); 
 
   MIB.addDef(ARM::LR);
   MIB.add(Count);
-  if (!isDo(Start))
+  if (!isDo(Start)) 
     MIB.add(Start->getOperand(1));
 
   LoLoop.ToRemove.insert(Start);
@@ -1467,50 +1467,50 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
 
 void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
   auto RemovePredicate = [](MachineInstr *MI) {
-    if (MI->isDebugInstr())
-      return;
+    if (MI->isDebugInstr()) 
+      return; 
     LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
-    int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
-    assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
-    assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
-           "Expected Then predicate!");
-    MI->getOperand(PIdx).setImm(ARMVCC::None);
-    MI->getOperand(PIdx + 1).setReg(0);
+    int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); 
+    assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction"); 
+    assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && 
+           "Expected Then predicate!"); 
+    MI->getOperand(PIdx).setImm(ARMVCC::None); 
+    MI->getOperand(PIdx + 1).setReg(0); 
   };
 
   for (auto &Block : LoLoop.getVPTBlocks()) {
-    SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
-
-    auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
-      assert(TheVCMP && "Replacing a removed or non-existent VCMP");
-      // Replace the VCMP with a VPT
-      MachineInstrBuilder MIB =
-          BuildMI(*At->getParent(), At, At->getDebugLoc(),
-                  TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
-      MIB.addImm(ARMVCC::Then);
-      // Register one
-      MIB.add(TheVCMP->getOperand(1));
-      // Register two
-      MIB.add(TheVCMP->getOperand(2));
-      // The comparison code, e.g. ge, eq, lt
-      MIB.add(TheVCMP->getOperand(3));
-      LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
-      LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
-      LoLoop.ToRemove.insert(TheVCMP);
-      TheVCMP = nullptr;
-    };
-
-    if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
-      MachineInstr *VPST = Insts.front();
-      if (VPTState::hasUniformPredicate(Block)) {
-        // A vpt block starting with VPST, is only predicated upon vctp and has no
-        // internal vpr defs:
-        // - Remove vpst.
-        // - Unpredicate the remaining instructions.
-        LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
-        for (unsigned i = 1; i < Insts.size(); ++i)
-          RemovePredicate(Insts[i]);
-      } else {
+    SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts(); 
+ 
+    auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) { 
+      assert(TheVCMP && "Replacing a removed or non-existent VCMP"); 
+      // Replace the VCMP with a VPT 
+      MachineInstrBuilder MIB = 
+          BuildMI(*At->getParent(), At, At->getDebugLoc(), 
+                  TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode()))); 
+      MIB.addImm(ARMVCC::Then); 
+      // Register one 
+      MIB.add(TheVCMP->getOperand(1)); 
+      // Register two 
+      MIB.add(TheVCMP->getOperand(2)); 
+      // The comparison code, e.g. ge, eq, lt 
+      MIB.add(TheVCMP->getOperand(3)); 
+      LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB); 
+      LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); 
+      LoLoop.ToRemove.insert(TheVCMP); 
+      TheVCMP = nullptr; 
+    }; 
+ 
+    if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) { 
+      MachineInstr *VPST = Insts.front(); 
+      if (VPTState::hasUniformPredicate(Block)) { 
+        // A vpt block starting with VPST, is only predicated upon vctp and has no 
+        // internal vpr defs: 
+        // - Remove vpst. 
+        // - Unpredicate the remaining instructions. 
+        LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); 
+        for (unsigned i = 1; i < Insts.size(); ++i) 
+          RemovePredicate(Insts[i]); 
+      } else { 
         // The VPT block has a non-uniform predicate but it uses a vpst and its
         // entry is guarded only by a vctp, which means we:
         // - Need to remove the original vpst.
@@ -1518,88 +1518,88 @@ void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
         //   we come across the divergent vpr def.
         // - Insert a new vpst to predicate the instruction(s) that following
         //   the divergent vpr def.
-        MachineInstr *Divergent = VPTState::getDivergent(Block);
-        MachineBasicBlock *MBB = Divergent->getParent();
-        auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
-        while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
-          ++DivergentNext;
-
-        bool DivergentNextIsPredicated =
-            DivergentNext != MBB->end() &&
-            getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
-
-        for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
-             I != E; ++I)
+        MachineInstr *Divergent = VPTState::getDivergent(Block); 
+        MachineBasicBlock *MBB = Divergent->getParent(); 
+        auto DivergentNext = ++MachineBasicBlock::iterator(Divergent); 
+        while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr()) 
+          ++DivergentNext; 
+ 
+        bool DivergentNextIsPredicated = 
+            DivergentNext != MBB->end() && 
+            getVPTInstrPredicate(*DivergentNext) != ARMVCC::None; 
+ 
+        for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext; 
+             I != E; ++I) 
           RemovePredicate(&*I);
 
-        // Check if the instruction defining vpr is a vcmp so it can be combined
-        // with the VPST This should be the divergent instruction
-        MachineInstr *VCMP =
-            VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
-
-        if (DivergentNextIsPredicated) {
-          // Insert a VPST at the divergent only if the next instruction
-          // would actually use it. A VCMP following a VPST can be
-          // merged into a VPT so do that instead if the VCMP exists.
-          if (!VCMP) {
-            // Create a VPST (with a null mask for now, we'll recompute it
-            // later)
-            MachineInstrBuilder MIB =
-                BuildMI(*Divergent->getParent(), Divergent,
-                        Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
-            MIB.addImm(0);
-            LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
-            LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
-          } else {
-            // No RDA checks are necessary here since the VPST would have been
-            // directly after the VCMP
-            ReplaceVCMPWithVPT(VCMP, VCMP);
-          }
+        // Check if the instruction defining vpr is a vcmp so it can be combined 
+        // with the VPST This should be the divergent instruction 
+        MachineInstr *VCMP = 
+            VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr; 
+ 
+        if (DivergentNextIsPredicated) { 
+          // Insert a VPST at the divergent only if the next instruction 
+          // would actually use it. A VCMP following a VPST can be 
+          // merged into a VPT so do that instead if the VCMP exists. 
+          if (!VCMP) { 
+            // Create a VPST (with a null mask for now, we'll recompute it 
+            // later) 
+            MachineInstrBuilder MIB = 
+                BuildMI(*Divergent->getParent(), Divergent, 
+                        Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST)); 
+            MIB.addImm(0); 
+            LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); 
+            LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); 
+          } else { 
+            // No RDA checks are necessary here since the VPST would have been 
+            // directly after the VCMP 
+            ReplaceVCMPWithVPT(VCMP, VCMP); 
+          } 
         }
       }
-      LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
-      LoLoop.ToRemove.insert(VPST);
-    } else if (Block.containsVCTP()) {
-      // The vctp will be removed, so either the entire block will be dead or
-      // the block mask of the vp(s)t will need to be recomputed.
-      MachineInstr *VPST = Insts.front();
-      if (Block.size() == 2) {
-        assert(VPST->getOpcode() == ARM::MVE_VPST &&
-               "Found a VPST in an otherwise empty vpt block");
-        LoLoop.ToRemove.insert(VPST);
-      } else
-        LoLoop.BlockMasksToRecompute.insert(VPST);
-    } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
-      // If this block starts with a VPST then attempt to merge it with the
-      // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
-      // block that no longer exists
-      MachineInstr *VPST = Insts.front();
-      auto Next = ++MachineBasicBlock::iterator(VPST);
-      assert(getVPTInstrPredicate(*Next) != ARMVCC::None &&
-             "The instruction after a VPST must be predicated");
-      (void)Next;
-      MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
-      if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
-          !LoLoop.ToRemove.contains(VprDef)) {
-        MachineInstr *VCMP = VprDef;
-        // The VCMP and VPST can only be merged if the VCMP's operands will have
-        // the same values at the VPST.
-        // If any of the instructions between the VCMP and VPST are predicated
-        // then a different code path is expected to have merged the VCMP and
-        // VPST already.
-        if (!std::any_of(++MachineBasicBlock::iterator(VCMP),
-                         MachineBasicBlock::iterator(VPST), hasVPRUse) &&
-            RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
-            RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
-          ReplaceVCMPWithVPT(VCMP, VPST);
-          LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
-          LoLoop.ToRemove.insert(VPST);
+      LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); 
+      LoLoop.ToRemove.insert(VPST); 
+    } else if (Block.containsVCTP()) { 
+      // The vctp will be removed, so either the entire block will be dead or 
+      // the block mask of the vp(s)t will need to be recomputed. 
+      MachineInstr *VPST = Insts.front(); 
+      if (Block.size() == 2) { 
+        assert(VPST->getOpcode() == ARM::MVE_VPST && 
+               "Found a VPST in an otherwise empty vpt block"); 
+        LoLoop.ToRemove.insert(VPST); 
+      } else 
+        LoLoop.BlockMasksToRecompute.insert(VPST); 
+    } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) { 
+      // If this block starts with a VPST then attempt to merge it with the 
+      // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT 
+      // block that no longer exists 
+      MachineInstr *VPST = Insts.front(); 
+      auto Next = ++MachineBasicBlock::iterator(VPST); 
+      assert(getVPTInstrPredicate(*Next) != ARMVCC::None && 
+             "The instruction after a VPST must be predicated"); 
+      (void)Next; 
+      MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR); 
+      if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) && 
+          !LoLoop.ToRemove.contains(VprDef)) { 
+        MachineInstr *VCMP = VprDef; 
+        // The VCMP and VPST can only be merged if the VCMP's operands will have 
+        // the same values at the VPST. 
+        // If any of the instructions between the VCMP and VPST are predicated 
+        // then a different code path is expected to have merged the VCMP and 
+        // VPST already. 
+        if (!std::any_of(++MachineBasicBlock::iterator(VCMP), 
+                         MachineBasicBlock::iterator(VPST), hasVPRUse) && 
+            RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) && 
+            RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) { 
+          ReplaceVCMPWithVPT(VCMP, VPST); 
+          LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); 
+          LoLoop.ToRemove.insert(VPST); 
         }
       }
     }
   }
-
-  LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
+ 
+  LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end()); 
 }
 
 void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
@@ -1613,9 +1613,9 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
     MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
                                       TII->get(Opc));
     MIB.addDef(ARM::LR);
-    unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
-    MIB.add(End->getOperand(Off + 0));
-    MIB.add(End->getOperand(Off + 1));
+    unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0; 
+    MIB.add(End->getOperand(Off + 0)); 
+    MIB.add(End->getOperand(Off + 1)); 
     LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
     LoLoop.ToRemove.insert(LoLoop.Dec);
     LoLoop.ToRemove.insert(End);
@@ -1643,17 +1643,17 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
     if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart)
       RevertWhile(LoLoop.Start);
     else
-      RevertDo(LoLoop.Start);
-    if (LoLoop.Dec == LoLoop.End)
-      RevertLoopEndDec(LoLoop.End);
-    else
-      RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
+      RevertDo(LoLoop.Start); 
+    if (LoLoop.Dec == LoLoop.End) 
+      RevertLoopEndDec(LoLoop.End); 
+    else 
+      RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec)); 
   } else {
     LoLoop.Start = ExpandLoopStart(LoLoop);
     RemoveDeadBranch(LoLoop.Start);
     LoLoop.End = ExpandLoopEnd(LoLoop);
     RemoveDeadBranch(LoLoop.End);
-    if (LoLoop.IsTailPredicationLegal())
+    if (LoLoop.IsTailPredicationLegal()) 
       ConvertVPTBlocks(LoLoop);
     for (auto *I : LoLoop.ToRemove) {
       LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
@@ -1691,7 +1691,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
     SmallVector<MachineInstr*, 4> Starts;
     SmallVector<MachineInstr*, 4> Decs;
     SmallVector<MachineInstr*, 4> Ends;
-    SmallVector<MachineInstr *, 4> EndDecs;
+    SmallVector<MachineInstr *, 4> EndDecs; 
 
     for (auto &I : MBB) {
       if (isLoopStart(I))
@@ -1700,11 +1700,11 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
         Decs.push_back(&I);
       else if (I.getOpcode() == ARM::t2LoopEnd)
         Ends.push_back(&I);
-      else if (I.getOpcode() == ARM::t2LoopEndDec)
-        EndDecs.push_back(&I);
+      else if (I.getOpcode() == ARM::t2LoopEndDec) 
+        EndDecs.push_back(&I); 
     }
 
-    if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
+    if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty()) 
       continue;
 
     Changed = true;
@@ -1713,15 +1713,15 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
       if (Start->getOpcode() == ARM::t2WhileLoopStart)
         RevertWhile(Start);
       else
-        RevertDo(Start);
+        RevertDo(Start); 
     }
     for (auto *Dec : Decs)
       RevertLoopDec(Dec);
 
     for (auto *End : Ends)
       RevertLoopEnd(End);
-    for (auto *End : EndDecs)
-      RevertLoopEndDec(End);
+    for (auto *End : EndDecs) 
+      RevertLoopEndDec(End); 
   }
   return Changed;
 }
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
index 9a7c1f541a..cd3c3b4ca6 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMParallelDSP.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h" 
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicsARM.h"
@@ -202,7 +202,7 @@ namespace {
   public:
     WidenedLoad(SmallVectorImpl<LoadInst*> &Lds, LoadInst *Wide)
       : NewLd(Wide) {
-      append_range(Loads, Lds);
+      append_range(Loads, Lds); 
     }
     LoadInst *getLoad() {
       return NewLd;
@@ -374,7 +374,7 @@ bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
   DepMap RAWDeps;
 
   // Record any writes that may alias a load.
-  const auto Size = LocationSize::beforeOrAfterPointer();
+  const auto Size = LocationSize::beforeOrAfterPointer(); 
   for (auto Write : Writes) {
     for (auto Read : Loads) {
       MemoryLocation ReadLoc =
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td b/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
index 2dc097566d..3c03b95e26 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMPredicates.td
@@ -77,8 +77,8 @@ def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
                                  AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
 def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
                                  AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
-def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
-                                 AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">, 
+                                 AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">; 
 def NoVFP            : Predicate<"!Subtarget->hasVFP2Base()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2Base()">,
                                  AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
@@ -189,9 +189,9 @@ let RecomputePerFunction = 1 in {
   def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
                            "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
                            "Subtarget->hasMinSize())">;
-  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
-  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>;
-
+  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>; 
+  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<ARMSubtarget>().hardenSlsBlr() }]>; 
+ 
 }
 def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 1a7f10a13e..eb905282dc 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -156,10 +156,10 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
            "Subclass not added?");
     assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
            "Subclass not added?");
-    assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) &&
+    assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnoip_and_tcGPRRegClassID)) && 
            "Subclass not added?");
-    assert(RBGPR.covers(*TRI.getRegClass(
-               ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) &&
+    assert(RBGPR.covers(*TRI.getRegClass( 
+               ARM::tGPREven_and_GPRnoip_and_tcGPRRegClassID)) && 
            "Subclass not added?");
     assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
            "Subclass not added?");
@@ -182,12 +182,12 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   switch (RC.getID()) {
   case GPRRegClassID:
   case GPRwithAPSRRegClassID:
-  case GPRnoipRegClassID:
+  case GPRnoipRegClassID: 
   case GPRnopcRegClassID:
-  case GPRnoip_and_GPRnopcRegClassID:
+  case GPRnoip_and_GPRnopcRegClassID: 
   case rGPRRegClassID:
   case GPRspRegClassID:
-  case GPRnoip_and_tcGPRRegClassID:
+  case GPRnoip_and_tcGPRRegClassID: 
   case tcGPRRegClassID:
   case tGPRRegClassID:
   case tGPREvenRegClassID:
@@ -195,7 +195,7 @@ ARMRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   case tGPR_and_tGPREvenRegClassID:
   case tGPR_and_tGPROddRegClassID:
   case tGPREven_and_tcGPRRegClassID:
-  case tGPREven_and_GPRnoip_and_tcGPRRegClassID:
+  case tGPREven_and_GPRnoip_and_tcGPRRegClassID: 
   case tGPROdd_and_tcGPRRegClassID:
     return getRegBank(ARM::GPRRegBankID);
   case HPRRegClassID:
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
index fe3243315d..8ac3e3c402 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMRegisterInfo.td
@@ -235,23 +235,23 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
   let DiagnosticString = "operand must be a register in range [r0, r15]";
 }
 
-// Register set that excludes registers that are reserved for procedure calls.
-// This is used for pseudo-instructions that are actually implemented using a
-// procedure call.
-def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> {
-  // Allocate LR as the first CSR since it is always saved anyway.
-  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
-  // know how to spill them. If we make our prologue/epilogue code smarter at
-  // some point, we can go back to using the above allocation orders for the
-  // Thumb1 instructions that know how to use hi regs.
-  let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8),
-                   (add (trunc GPRnoip, 8), (shl GPRnoip, 8))];
-  let AltOrderSelect = [{
-      return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
-  }];
-  let DiagnosticString = "operand must be a register in range [r0, r14]";
-}
-
+// Register set that excludes registers that are reserved for procedure calls. 
+// This is used for pseudo-instructions that are actually implemented using a 
+// procedure call. 
+def GPRnoip : RegisterClass<"ARM", [i32], 32, (sub GPR, R12, LR)> { 
+  // Allocate LR as the first CSR since it is always saved anyway. 
+  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't 
+  // know how to spill them. If we make our prologue/epilogue code smarter at 
+  // some point, we can go back to using the above allocation orders for the 
+  // Thumb1 instructions that know how to use hi regs. 
+  let AltOrders = [(add GPRnoip, GPRnoip), (trunc GPRnoip, 8), 
+                   (add (trunc GPRnoip, 8), (shl GPRnoip, 8))]; 
+  let AltOrderSelect = [{ 
+      return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF); 
+  }]; 
+  let DiagnosticString = "operand must be a register in range [r0, r14]"; 
+} 
+ 
 // GPRs without the PC.  Some ARM instructions do not allow the PC in
 // certain operand slots, particularly as the destination.  Primarily
 // useful for disassembly.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
index cfcc7d5a04..de2cd45c14 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSLSHardening.cpp
@@ -1,416 +1,416 @@
-//===- ARMSLSHardening.cpp - Harden Straight Line Missspeculation ---------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass to insert code to mitigate against side channel
-// vulnerabilities that may happen under straight line miss-speculation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/IndirectThunks.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DebugLoc.h"
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "arm-sls-hardening"
-
-#define ARM_SLS_HARDENING_NAME "ARM sls hardening pass"
-
-namespace {
-
-class ARMSLSHardening : public MachineFunctionPass {
-public:
-  const TargetInstrInfo *TII;
-  const ARMSubtarget *ST;
-
-  static char ID;
-
-  ARMSLSHardening() : MachineFunctionPass(ID) {
-    initializeARMSLSHardeningPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &Fn) override;
-
-  StringRef getPassName() const override { return ARM_SLS_HARDENING_NAME; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-private:
-  bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
-  bool hardenIndirectCalls(MachineBasicBlock &MBB) const;
-  MachineBasicBlock &
-  ConvertIndirectCallToIndirectJump(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator) const;
-};
-
-} // end anonymous namespace
-
-char ARMSLSHardening::ID = 0;
-
-INITIALIZE_PASS(ARMSLSHardening, "arm-sls-hardening",
-                ARM_SLS_HARDENING_NAME, false, false)
-
-static void insertSpeculationBarrier(const ARMSubtarget *ST,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     DebugLoc DL,
-                                     bool AlwaysUseISBDSB = false) {
-  assert(MBBI != MBB.begin() &&
-         "Must not insert SpeculationBarrierEndBB as only instruction in MBB.");
-  assert(std::prev(MBBI)->isBarrier() &&
-         "SpeculationBarrierEndBB must only follow unconditional control flow "
-         "instructions.");
-  assert(std::prev(MBBI)->isTerminator() &&
-         "SpeculationBarrierEndBB must only follow terminators.");
-  const TargetInstrInfo *TII = ST->getInstrInfo();
-  assert(ST->hasDataBarrier() || ST->hasSB());
-  bool ProduceSB = ST->hasSB() && !AlwaysUseISBDSB;
-  unsigned BarrierOpc =
-      ProduceSB ? (ST->isThumb() ? ARM::t2SpeculationBarrierSBEndBB
-                                 : ARM::SpeculationBarrierSBEndBB)
-                : (ST->isThumb() ? ARM::t2SpeculationBarrierISBDSBEndBB
-                                 : ARM::SpeculationBarrierISBDSBEndBB);
-  if (MBBI == MBB.end() || !isSpeculationBarrierEndBBOpcode(MBBI->getOpcode()))
-    BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc));
-}
-
-bool ARMSLSHardening::runOnMachineFunction(MachineFunction &MF) {
-  ST = &MF.getSubtarget<ARMSubtarget>();
-  TII = MF.getSubtarget().getInstrInfo();
-
-  bool Modified = false;
-  for (auto &MBB : MF) {
-    Modified |= hardenReturnsAndBRs(MBB);
-    Modified |= hardenIndirectCalls(MBB);
-  }
-
-  return Modified;
-}
-
-bool ARMSLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const {
-  if (!ST->hardenSlsRetBr())
-    return false;
-  assert(!ST->isThumb1Only());
-  bool Modified = false;
-  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end();
-  MachineBasicBlock::iterator NextMBBI;
-  for (; MBBI != E; MBBI = NextMBBI) {
-    MachineInstr &MI = *MBBI;
-    NextMBBI = std::next(MBBI);
-    if (isIndirectControlFlowNotComingBack(MI)) {
-      assert(MI.isTerminator());
-      assert(!TII->isPredicated(MI));
-      insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc());
-      Modified = true;
-    }
-  }
-  return Modified;
-}
-
-static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_";
-
-static const struct ThunkNameRegMode {
-  const char* Name;
-  Register Reg;
-  bool isThumb;
-} SLSBLRThunks[] = {
-    {"__llvm_slsblr_thunk_arm_r0", ARM::R0, false},
-    {"__llvm_slsblr_thunk_arm_r1", ARM::R1, false},
-    {"__llvm_slsblr_thunk_arm_r2", ARM::R2, false},
-    {"__llvm_slsblr_thunk_arm_r3", ARM::R3, false},
-    {"__llvm_slsblr_thunk_arm_r4", ARM::R4, false},
-    {"__llvm_slsblr_thunk_arm_r5", ARM::R5, false},
-    {"__llvm_slsblr_thunk_arm_r6", ARM::R6, false},
-    {"__llvm_slsblr_thunk_arm_r7", ARM::R7, false},
-    {"__llvm_slsblr_thunk_arm_r8", ARM::R8, false},
-    {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false},
-    {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false},
-    {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false},
-    {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false},
-    {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false},
-    {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true},
-    {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true},
-    {"__llvm_slsblr_thunk_thumb_r2", ARM::R2, true},
-    {"__llvm_slsblr_thunk_thumb_r3", ARM::R3, true},
-    {"__llvm_slsblr_thunk_thumb_r4", ARM::R4, true},
-    {"__llvm_slsblr_thunk_thumb_r5", ARM::R5, true},
-    {"__llvm_slsblr_thunk_thumb_r6", ARM::R6, true},
-    {"__llvm_slsblr_thunk_thumb_r7", ARM::R7, true},
-    {"__llvm_slsblr_thunk_thumb_r8", ARM::R8, true},
-    {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true},
-    {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true},
-    {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true},
-    {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true},
-    {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true},
-};
-
-namespace {
-struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> {
-  const char *getThunkPrefix() { return SLSBLRNamePrefix; }
-  bool mayUseThunk(const MachineFunction &MF) {
-    // FIXME: This could also check if there are any indirect calls in the
-    // function to more accurately reflect if a thunk will be needed.
-    return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr();
-  }
-  void insertThunks(MachineModuleInfo &MMI);
-  void populateThunk(MachineFunction &MF);
-};
-} // namespace
-
-void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) {
-  // FIXME: It probably would be possible to filter which thunks to produce
-  // based on which registers are actually used in indirect calls in this
-  // function. But would that be a worthwhile optimization?
-  for (auto T : SLSBLRThunks)
-    createThunkFunction(MMI, T.Name);
-}
-
-void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
-  // FIXME: How to better communicate Register number, rather than through
-  // name and lookup table?
-  assert(MF.getName().startswith(getThunkPrefix()));
-  auto ThunkIt = llvm::find_if(
-      SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); });
-  assert(ThunkIt != std::end(SLSBLRThunks));
-  Register ThunkReg = ThunkIt->Reg;
-  bool isThumb = ThunkIt->isThumb;
-
-  const TargetInstrInfo *TII = MF.getSubtarget<ARMSubtarget>().getInstrInfo();
-  MachineBasicBlock *Entry = &MF.front();
-  Entry->clear();
-
-  //  These thunks need to consist of the following instructions:
-  //  __llvm_slsblr_thunk_(arm/thumb)_rN:
-  //      bx  rN
-  //      barrierInsts
-  Entry->addLiveIn(ThunkReg);
-  if (isThumb)
-    BuildMI(Entry, DebugLoc(), TII->get(ARM::tBX))
-        .addReg(ThunkReg)
-        .add(predOps(ARMCC::AL));
-  else
-    BuildMI(Entry, DebugLoc(), TII->get(ARM::BX))
-        .addReg(ThunkReg);
-
-  // Make sure the thunks do not make use of the SB extension in case there is
-  // a function somewhere that will call to it that for some reason disabled
-  // the SB extension locally on that function, even though it's enabled for
-  // the module otherwise. Therefore set AlwaysUseISBSDB to true.
-  insertSpeculationBarrier(&MF.getSubtarget<ARMSubtarget>(), *Entry,
-                           Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
-}
-
-MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
-  // Transform an indirect call to an indirect jump as follows:
-  // Before:
-  //   |-----------------------------|
-  //   |      ...                    |
-  //   |  instI                      |
-  //   |  BLX rN                     |
-  //   |  instJ                      |
-  //   |      ...                    |
-  //   |-----------------------------|
-  //
-  // After:
-  //   |----------   -------------------------|
-  //   |      ...                             |
-  //   |  instI                               |
-  //   |  *call* __llvm_slsblr_thunk_mode_xN  |
-  //   |  instJ                               |
-  //   |      ...                             |
-  //   |--------------------------------------|
-  //
-  //   __llvm_slsblr_thunk_mode_xN:
-  //   |-----------------------------|
-  //   |  BX rN                      |
-  //   |  barrierInsts               |
-  //   |-----------------------------|
-  //
-  // The __llvm_slsblr_thunk_mode_xN thunks are created by the
-  // SLSBLRThunkInserter.
-  // This function merely needs to transform an indirect call to a direct call
-  // to __llvm_slsblr_thunk_xN.
-  MachineInstr &IndirectCall = *MBBI;
-  assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn());
-  int RegOpIdxOnIndirectCall = -1;
-  bool isThumb;
-  switch (IndirectCall.getOpcode()) {
-  case ARM::BLX:   // !isThumb2
-  case ARM::BLX_noip:   // !isThumb2
-    isThumb = false;
-    RegOpIdxOnIndirectCall = 0;
-    break;
-  case ARM::tBLXr:      // isThumb2
-  case ARM::tBLXr_noip: // isThumb2
-    isThumb = true;
-    RegOpIdxOnIndirectCall = 2;
-    break;
-  default:
-    llvm_unreachable("unhandled Indirect Call");
-  }
-
-  Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg();
-  // Since linkers are allowed to clobber R12 on function calls, the above
-  // mitigation only works if the original indirect call instruction was not
-  // using R12. Code generation before must make sure that no indirect call
-  // using R12 was produced if the mitigation is enabled.
-  // Also, the transformation is incorrect if the indirect call uses LR, so
-  // also have to avoid that.
-  assert(Reg != ARM::R12 && Reg != ARM::LR);
-  bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill();
-
-  DebugLoc DL = IndirectCall.getDebugLoc();
-
-  MachineFunction &MF = *MBBI->getMF();
-  auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg, isThumb](auto T) {
-    return T.Reg == Reg && T.isThumb == isThumb;
-  });
-  assert(ThunkIt != std::end(SLSBLRThunks));
-  Module *M = MF.getFunction().getParent();
-  const GlobalValue *GV = cast<GlobalValue>(M->getNamedValue(ThunkIt->Name));
-
-  MachineInstr *BL =
-      isThumb ? BuildMI(MBB, MBBI, DL, TII->get(ARM::tBL))
-                    .addImm(IndirectCall.getOperand(0).getImm())
-                    .addReg(IndirectCall.getOperand(1).getReg())
-                    .addGlobalAddress(GV)
-              : BuildMI(MBB, MBBI, DL, TII->get(ARM::BL)).addGlobalAddress(GV);
-
-  // Now copy the implicit operands from IndirectCall to BL and copy other
-  // necessary info.
-  // However, both IndirectCall and BL instructions implictly use SP and
-  // implicitly define LR. Blindly copying implicit operands would result in SP
-  // and LR operands to be present multiple times. While this may not be too
-  // much of an issue, let's avoid that for cleanliness, by removing those
-  // implicit operands from the BL created above before we copy over all
-  // implicit operands from the IndirectCall.
-  int ImpLROpIdx = -1;
-  int ImpSPOpIdx = -1;
-  for (unsigned OpIdx = BL->getNumExplicitOperands();
-       OpIdx < BL->getNumOperands(); OpIdx++) {
-    MachineOperand Op = BL->getOperand(OpIdx);
-    if (!Op.isReg())
-      continue;
-    if (Op.getReg() == ARM::LR && Op.isDef())
-      ImpLROpIdx = OpIdx;
-    if (Op.getReg() == ARM::SP && !Op.isDef())
-      ImpSPOpIdx = OpIdx;
-  }
-  assert(ImpLROpIdx != -1);
-  assert(ImpSPOpIdx != -1);
-  int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx);
-  int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx);
-  BL->RemoveOperand(FirstOpIdxToRemove);
-  BL->RemoveOperand(SecondOpIdxToRemove);
-  // Now copy over the implicit operands from the original IndirectCall
-  BL->copyImplicitOps(MF, IndirectCall);
-  MF.moveCallSiteInfo(&IndirectCall, BL);
-  // Also add the register called in the IndirectCall as being used in the
-  // called thunk.
-  BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/,
-                                           RegIsKilled /*isKill*/));
-  // Remove IndirectCallinstruction
-  MBB.erase(MBBI);
-  return MBB;
-}
-
-bool ARMSLSHardening::hardenIndirectCalls(MachineBasicBlock &MBB) const {
-  if (!ST->hardenSlsBlr())
-    return false;
-  bool Modified = false;
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  MachineBasicBlock::iterator NextMBBI;
-  for (; MBBI != E; MBBI = NextMBBI) {
-    MachineInstr &MI = *MBBI;
-    NextMBBI = std::next(MBBI);
-    // Tail calls are both indirect calls and "returns".
-    // They are also indirect jumps, so should be handled by sls-harden-retbr,
-    // rather than sls-harden-blr.
-    if (isIndirectCall(MI) && !MI.isReturn()) {
-      ConvertIndirectCallToIndirectJump(MBB, MBBI);
-      Modified = true;
-    }
-  }
-  return Modified;
-}
-
-
-
-FunctionPass *llvm::createARMSLSHardeningPass() {
-  return new ARMSLSHardening();
-}
-
-namespace {
-class ARMIndirectThunks : public MachineFunctionPass {
-public:
-  static char ID;
-
-  ARMIndirectThunks() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "ARM Indirect Thunks"; }
-
-  bool doInitialization(Module &M) override;
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineModuleInfoWrapperPass>();
-    AU.addPreserved<MachineModuleInfoWrapperPass>();
-  }
-
-private:
-  std::tuple<SLSBLRThunkInserter> TIs;
-
-  // FIXME: When LLVM moves to C++17, these can become folds
-  template <typename... ThunkInserterT>
-  static void initTIs(Module &M,
-                      std::tuple<ThunkInserterT...> &ThunkInserters) {
-    (void)std::initializer_list<int>{
-        (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
-  }
-  template <typename... ThunkInserterT>
-  static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
-                     std::tuple<ThunkInserterT...> &ThunkInserters) {
-    bool Modified = false;
-    (void)std::initializer_list<int>{
-        Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
-    return Modified;
-  }
-};
-
-} // end anonymous namespace
-
-char ARMIndirectThunks::ID = 0;
-
-FunctionPass *llvm::createARMIndirectThunks() {
-  return new ARMIndirectThunks();
-}
-
-bool ARMIndirectThunks::doInitialization(Module &M) {
-  initTIs(M, TIs);
-  return false;
-}
-
-bool ARMIndirectThunks::runOnMachineFunction(MachineFunction &MF) {
-  LLVM_DEBUG(dbgs() << getPassName() << '\n');
-  auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
-  return runTIs(MMI, MF, TIs);
-}
+//===- ARMSLSHardening.cpp - Harden Straight Line Missspeculation ---------===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file contains a pass to insert code to mitigate against side channel 
+// vulnerabilities that may happen under straight line miss-speculation. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "ARM.h" 
+#include "ARMInstrInfo.h" 
+#include "ARMSubtarget.h" 
+#include "llvm/CodeGen/IndirectThunks.h" 
+#include "llvm/CodeGen/MachineBasicBlock.h" 
+#include "llvm/CodeGen/MachineFunction.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineInstr.h" 
+#include "llvm/CodeGen/MachineInstrBuilder.h" 
+#include "llvm/CodeGen/MachineOperand.h" 
+#include "llvm/IR/DebugLoc.h" 
+#include <cassert> 
+ 
+using namespace llvm; 
+ 
+#define DEBUG_TYPE "arm-sls-hardening" 
+ 
+#define ARM_SLS_HARDENING_NAME "ARM sls hardening pass" 
+ 
+namespace { 
+ 
+class ARMSLSHardening : public MachineFunctionPass { 
+public: 
+  const TargetInstrInfo *TII; 
+  const ARMSubtarget *ST; 
+ 
+  static char ID; 
+ 
+  ARMSLSHardening() : MachineFunctionPass(ID) { 
+    initializeARMSLSHardeningPass(*PassRegistry::getPassRegistry()); 
+  } 
+ 
+  bool runOnMachineFunction(MachineFunction &Fn) override; 
+ 
+  StringRef getPassName() const override { return ARM_SLS_HARDENING_NAME; } 
+ 
+  void getAnalysisUsage(AnalysisUsage &AU) const override { 
+    AU.setPreservesCFG(); 
+    MachineFunctionPass::getAnalysisUsage(AU); 
+  } 
+ 
+private: 
+  bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const; 
+  bool hardenIndirectCalls(MachineBasicBlock &MBB) const; 
+  MachineBasicBlock & 
+  ConvertIndirectCallToIndirectJump(MachineBasicBlock &MBB, 
+                                    MachineBasicBlock::iterator) const; 
+}; 
+ 
+} // end anonymous namespace 
+ 
+char ARMSLSHardening::ID = 0; 
+ 
+INITIALIZE_PASS(ARMSLSHardening, "arm-sls-hardening", 
+                ARM_SLS_HARDENING_NAME, false, false) 
+ 
+static void insertSpeculationBarrier(const ARMSubtarget *ST, 
+                                     MachineBasicBlock &MBB, 
+                                     MachineBasicBlock::iterator MBBI, 
+                                     DebugLoc DL, 
+                                     bool AlwaysUseISBDSB = false) { 
+  assert(MBBI != MBB.begin() && 
+         "Must not insert SpeculationBarrierEndBB as only instruction in MBB."); 
+  assert(std::prev(MBBI)->isBarrier() && 
+         "SpeculationBarrierEndBB must only follow unconditional control flow " 
+         "instructions."); 
+  assert(std::prev(MBBI)->isTerminator() && 
+         "SpeculationBarrierEndBB must only follow terminators."); 
+  const TargetInstrInfo *TII = ST->getInstrInfo(); 
+  assert(ST->hasDataBarrier() || ST->hasSB()); 
+  bool ProduceSB = ST->hasSB() && !AlwaysUseISBDSB; 
+  unsigned BarrierOpc = 
+      ProduceSB ? (ST->isThumb() ? ARM::t2SpeculationBarrierSBEndBB 
+                                 : ARM::SpeculationBarrierSBEndBB) 
+                : (ST->isThumb() ? ARM::t2SpeculationBarrierISBDSBEndBB 
+                                 : ARM::SpeculationBarrierISBDSBEndBB); 
+  if (MBBI == MBB.end() || !isSpeculationBarrierEndBBOpcode(MBBI->getOpcode())) 
+    BuildMI(MBB, MBBI, DL, TII->get(BarrierOpc)); 
+} 
+ 
+bool ARMSLSHardening::runOnMachineFunction(MachineFunction &MF) { 
+  ST = &MF.getSubtarget<ARMSubtarget>(); 
+  TII = MF.getSubtarget().getInstrInfo(); 
+ 
+  bool Modified = false; 
+  for (auto &MBB : MF) { 
+    Modified |= hardenReturnsAndBRs(MBB); 
+    Modified |= hardenIndirectCalls(MBB); 
+  } 
+ 
+  return Modified; 
+} 
+ 
+bool ARMSLSHardening::hardenReturnsAndBRs(MachineBasicBlock &MBB) const { 
+  if (!ST->hardenSlsRetBr()) 
+    return false; 
+  assert(!ST->isThumb1Only()); 
+  bool Modified = false; 
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(), E = MBB.end(); 
+  MachineBasicBlock::iterator NextMBBI; 
+  for (; MBBI != E; MBBI = NextMBBI) { 
+    MachineInstr &MI = *MBBI; 
+    NextMBBI = std::next(MBBI); 
+    if (isIndirectControlFlowNotComingBack(MI)) { 
+      assert(MI.isTerminator()); 
+      assert(!TII->isPredicated(MI)); 
+      insertSpeculationBarrier(ST, MBB, std::next(MBBI), MI.getDebugLoc()); 
+      Modified = true; 
+    } 
+  } 
+  return Modified; 
+} 
+ 
+static const char SLSBLRNamePrefix[] = "__llvm_slsblr_thunk_"; 
+ 
+static const struct ThunkNameRegMode { 
+  const char* Name; 
+  Register Reg; 
+  bool isThumb; 
+} SLSBLRThunks[] = { 
+    {"__llvm_slsblr_thunk_arm_r0", ARM::R0, false}, 
+    {"__llvm_slsblr_thunk_arm_r1", ARM::R1, false}, 
+    {"__llvm_slsblr_thunk_arm_r2", ARM::R2, false}, 
+    {"__llvm_slsblr_thunk_arm_r3", ARM::R3, false}, 
+    {"__llvm_slsblr_thunk_arm_r4", ARM::R4, false}, 
+    {"__llvm_slsblr_thunk_arm_r5", ARM::R5, false}, 
+    {"__llvm_slsblr_thunk_arm_r6", ARM::R6, false}, 
+    {"__llvm_slsblr_thunk_arm_r7", ARM::R7, false}, 
+    {"__llvm_slsblr_thunk_arm_r8", ARM::R8, false}, 
+    {"__llvm_slsblr_thunk_arm_r9", ARM::R9, false}, 
+    {"__llvm_slsblr_thunk_arm_r10", ARM::R10, false}, 
+    {"__llvm_slsblr_thunk_arm_r11", ARM::R11, false}, 
+    {"__llvm_slsblr_thunk_arm_sp", ARM::SP, false}, 
+    {"__llvm_slsblr_thunk_arm_pc", ARM::PC, false}, 
+    {"__llvm_slsblr_thunk_thumb_r0", ARM::R0, true}, 
+    {"__llvm_slsblr_thunk_thumb_r1", ARM::R1, true}, 
+    {"__llvm_slsblr_thunk_thumb_r2", ARM::R2, true}, 
+    {"__llvm_slsblr_thunk_thumb_r3", ARM::R3, true}, 
+    {"__llvm_slsblr_thunk_thumb_r4", ARM::R4, true}, 
+    {"__llvm_slsblr_thunk_thumb_r5", ARM::R5, true}, 
+    {"__llvm_slsblr_thunk_thumb_r6", ARM::R6, true}, 
+    {"__llvm_slsblr_thunk_thumb_r7", ARM::R7, true}, 
+    {"__llvm_slsblr_thunk_thumb_r8", ARM::R8, true}, 
+    {"__llvm_slsblr_thunk_thumb_r9", ARM::R9, true}, 
+    {"__llvm_slsblr_thunk_thumb_r10", ARM::R10, true}, 
+    {"__llvm_slsblr_thunk_thumb_r11", ARM::R11, true}, 
+    {"__llvm_slsblr_thunk_thumb_sp", ARM::SP, true}, 
+    {"__llvm_slsblr_thunk_thumb_pc", ARM::PC, true}, 
+}; 
+ 
+namespace { 
+struct SLSBLRThunkInserter : ThunkInserter<SLSBLRThunkInserter> { 
+  const char *getThunkPrefix() { return SLSBLRNamePrefix; } 
+  bool mayUseThunk(const MachineFunction &MF) { 
+    // FIXME: This could also check if there are any indirect calls in the 
+    // function to more accurately reflect if a thunk will be needed. 
+    return MF.getSubtarget<ARMSubtarget>().hardenSlsBlr(); 
+  } 
+  void insertThunks(MachineModuleInfo &MMI); 
+  void populateThunk(MachineFunction &MF); 
+}; 
+} // namespace 
+ 
+void SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI) { 
+  // FIXME: It probably would be possible to filter which thunks to produce 
+  // based on which registers are actually used in indirect calls in this 
+  // function. But would that be a worthwhile optimization? 
+  for (auto T : SLSBLRThunks) 
+    createThunkFunction(MMI, T.Name); 
+} 
+ 
+void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) { 
+  // FIXME: How to better communicate Register number, rather than through 
+  // name and lookup table? 
+  assert(MF.getName().startswith(getThunkPrefix())); 
+  auto ThunkIt = llvm::find_if( 
+      SLSBLRThunks, [&MF](auto T) { return T.Name == MF.getName(); }); 
+  assert(ThunkIt != std::end(SLSBLRThunks)); 
+  Register ThunkReg = ThunkIt->Reg; 
+  bool isThumb = ThunkIt->isThumb; 
+ 
+  const TargetInstrInfo *TII = MF.getSubtarget<ARMSubtarget>().getInstrInfo(); 
+  MachineBasicBlock *Entry = &MF.front(); 
+  Entry->clear(); 
+ 
+  //  These thunks need to consist of the following instructions: 
+  //  __llvm_slsblr_thunk_(arm/thumb)_rN: 
+  //      bx  rN 
+  //      barrierInsts 
+  Entry->addLiveIn(ThunkReg); 
+  if (isThumb) 
+    BuildMI(Entry, DebugLoc(), TII->get(ARM::tBX)) 
+        .addReg(ThunkReg) 
+        .add(predOps(ARMCC::AL)); 
+  else 
+    BuildMI(Entry, DebugLoc(), TII->get(ARM::BX)) 
+        .addReg(ThunkReg); 
+ 
+  // Make sure the thunks do not make use of the SB extension in case there is 
+  // a function somewhere that will call to it that for some reason disabled 
+  // the SB extension locally on that function, even though it's enabled for 
+  // the module otherwise. Therefore set AlwaysUseISBSDB to true. 
+  insertSpeculationBarrier(&MF.getSubtarget<ARMSubtarget>(), *Entry, 
+                           Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/); 
+} 
+ 
+MachineBasicBlock &ARMSLSHardening::ConvertIndirectCallToIndirectJump( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 
+  // Transform an indirect call to an indirect jump as follows: 
+  // Before: 
+  //   |-----------------------------| 
+  //   |      ...                    | 
+  //   |  instI                      | 
+  //   |  BLX rN                     | 
+  //   |  instJ                      | 
+  //   |      ...                    | 
+  //   |-----------------------------| 
+  // 
+  // After: 
+  //   |----------   -------------------------| 
+  //   |      ...                             | 
+  //   |  instI                               | 
+  //   |  *call* __llvm_slsblr_thunk_mode_xN  | 
+  //   |  instJ                               | 
+  //   |      ...                             | 
+  //   |--------------------------------------| 
+  // 
+  //   __llvm_slsblr_thunk_mode_xN: 
+  //   |-----------------------------| 
+  //   |  BX rN                      | 
+  //   |  barrierInsts               | 
+  //   |-----------------------------| 
+  // 
+  // The __llvm_slsblr_thunk_mode_xN thunks are created by the 
+  // SLSBLRThunkInserter. 
+  // This function merely needs to transform an indirect call to a direct call 
+  // to __llvm_slsblr_thunk_xN. 
+  MachineInstr &IndirectCall = *MBBI; 
+  assert(isIndirectCall(IndirectCall) && !IndirectCall.isReturn()); 
+  int RegOpIdxOnIndirectCall = -1; 
+  bool isThumb; 
+  switch (IndirectCall.getOpcode()) { 
+  case ARM::BLX:   // !isThumb2 
+  case ARM::BLX_noip:   // !isThumb2 
+    isThumb = false; 
+    RegOpIdxOnIndirectCall = 0; 
+    break; 
+  case ARM::tBLXr:      // isThumb2 
+  case ARM::tBLXr_noip: // isThumb2 
+    isThumb = true; 
+    RegOpIdxOnIndirectCall = 2; 
+    break; 
+  default: 
+    llvm_unreachable("unhandled Indirect Call"); 
+  } 
+ 
+  Register Reg = IndirectCall.getOperand(RegOpIdxOnIndirectCall).getReg(); 
+  // Since linkers are allowed to clobber R12 on function calls, the above 
+  // mitigation only works if the original indirect call instruction was not 
+  // using R12. Code generation before must make sure that no indirect call 
+  // using R12 was produced if the mitigation is enabled. 
+  // Also, the transformation is incorrect if the indirect call uses LR, so 
+  // also have to avoid that. 
+  assert(Reg != ARM::R12 && Reg != ARM::LR); 
+  bool RegIsKilled = IndirectCall.getOperand(RegOpIdxOnIndirectCall).isKill(); 
+ 
+  DebugLoc DL = IndirectCall.getDebugLoc(); 
+ 
+  MachineFunction &MF = *MBBI->getMF(); 
+  auto ThunkIt = llvm::find_if(SLSBLRThunks, [Reg, isThumb](auto T) { 
+    return T.Reg == Reg && T.isThumb == isThumb; 
+  }); 
+  assert(ThunkIt != std::end(SLSBLRThunks)); 
+  Module *M = MF.getFunction().getParent(); 
+  const GlobalValue *GV = cast<GlobalValue>(M->getNamedValue(ThunkIt->Name)); 
+ 
+  MachineInstr *BL = 
+      isThumb ? BuildMI(MBB, MBBI, DL, TII->get(ARM::tBL)) 
+                    .addImm(IndirectCall.getOperand(0).getImm()) 
+                    .addReg(IndirectCall.getOperand(1).getReg()) 
+                    .addGlobalAddress(GV) 
+              : BuildMI(MBB, MBBI, DL, TII->get(ARM::BL)).addGlobalAddress(GV); 
+ 
+  // Now copy the implicit operands from IndirectCall to BL and copy other 
+  // necessary info. 
+  // However, both IndirectCall and BL instructions implictly use SP and 
+  // implicitly define LR. Blindly copying implicit operands would result in SP 
+  // and LR operands to be present multiple times. While this may not be too 
+  // much of an issue, let's avoid that for cleanliness, by removing those 
+  // implicit operands from the BL created above before we copy over all 
+  // implicit operands from the IndirectCall. 
+  int ImpLROpIdx = -1; 
+  int ImpSPOpIdx = -1; 
+  for (unsigned OpIdx = BL->getNumExplicitOperands(); 
+       OpIdx < BL->getNumOperands(); OpIdx++) { 
+    MachineOperand Op = BL->getOperand(OpIdx); 
+    if (!Op.isReg()) 
+      continue; 
+    if (Op.getReg() == ARM::LR && Op.isDef()) 
+      ImpLROpIdx = OpIdx; 
+    if (Op.getReg() == ARM::SP && !Op.isDef()) 
+      ImpSPOpIdx = OpIdx; 
+  } 
+  assert(ImpLROpIdx != -1); 
+  assert(ImpSPOpIdx != -1); 
+  int FirstOpIdxToRemove = std::max(ImpLROpIdx, ImpSPOpIdx); 
+  int SecondOpIdxToRemove = std::min(ImpLROpIdx, ImpSPOpIdx); 
+  BL->RemoveOperand(FirstOpIdxToRemove); 
+  BL->RemoveOperand(SecondOpIdxToRemove); 
+  // Now copy over the implicit operands from the original IndirectCall 
+  BL->copyImplicitOps(MF, IndirectCall); 
+  MF.moveCallSiteInfo(&IndirectCall, BL); 
+  // Also add the register called in the IndirectCall as being used in the 
+  // called thunk. 
+  BL->addOperand(MachineOperand::CreateReg(Reg, false /*isDef*/, true /*isImp*/, 
+                                           RegIsKilled /*isKill*/)); 
+  // Remove IndirectCallinstruction 
+  MBB.erase(MBBI); 
+  return MBB; 
+} 
+ 
+bool ARMSLSHardening::hardenIndirectCalls(MachineBasicBlock &MBB) const { 
+  if (!ST->hardenSlsBlr()) 
+    return false; 
+  bool Modified = false; 
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 
+  MachineBasicBlock::iterator NextMBBI; 
+  for (; MBBI != E; MBBI = NextMBBI) { 
+    MachineInstr &MI = *MBBI; 
+    NextMBBI = std::next(MBBI); 
+    // Tail calls are both indirect calls and "returns". 
+    // They are also indirect jumps, so should be handled by sls-harden-retbr, 
+    // rather than sls-harden-blr. 
+    if (isIndirectCall(MI) && !MI.isReturn()) { 
+      ConvertIndirectCallToIndirectJump(MBB, MBBI); 
+      Modified = true; 
+    } 
+  } 
+  return Modified; 
+} 
+ 
+ 
+ 
+FunctionPass *llvm::createARMSLSHardeningPass() { 
+  return new ARMSLSHardening(); 
+} 
+ 
+namespace { 
+class ARMIndirectThunks : public MachineFunctionPass { 
+public: 
+  static char ID; 
+ 
+  ARMIndirectThunks() : MachineFunctionPass(ID) {} 
+ 
+  StringRef getPassName() const override { return "ARM Indirect Thunks"; } 
+ 
+  bool doInitialization(Module &M) override; 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+ 
+  void getAnalysisUsage(AnalysisUsage &AU) const override { 
+    MachineFunctionPass::getAnalysisUsage(AU); 
+    AU.addRequired<MachineModuleInfoWrapperPass>(); 
+    AU.addPreserved<MachineModuleInfoWrapperPass>(); 
+  } 
+ 
+private: 
+  std::tuple<SLSBLRThunkInserter> TIs; 
+ 
+  // FIXME: When LLVM moves to C++17, these can become folds 
+  template <typename... ThunkInserterT> 
+  static void initTIs(Module &M, 
+                      std::tuple<ThunkInserterT...> &ThunkInserters) { 
+    (void)std::initializer_list<int>{ 
+        (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; 
+  } 
+  template <typename... ThunkInserterT> 
+  static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, 
+                     std::tuple<ThunkInserterT...> &ThunkInserters) { 
+    bool Modified = false; 
+    (void)std::initializer_list<int>{ 
+        Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; 
+    return Modified; 
+  } 
+}; 
+ 
+} // end anonymous namespace 
+ 
+char ARMIndirectThunks::ID = 0; 
+ 
+FunctionPass *llvm::createARMIndirectThunks() { 
+  return new ARMIndirectThunks(); 
+} 
+ 
+bool ARMIndirectThunks::doInitialization(Module &M) { 
+  initTIs(M, TIs); 
+  return false; 
+} 
+ 
+bool ARMIndirectThunks::runOnMachineFunction(MachineFunction &MF) { 
+  LLVM_DEBUG(dbgs() << getPassName() << '\n'); 
+  auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 
+  return runTIs(MMI, MF, TIs); 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td b/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
index 53a2a6fec5..503a0fbd96 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSchedule.td
@@ -151,61 +151,61 @@ def : PredicateProlog<[{
   (void)STI;
 }]>;
 
-def IsPredicated : CheckFunctionPredicateWithTII<
-  "ARM_MC::isPredicated",
-  "isPredicated"
->;
-def IsPredicatedPred : MCSchedPredicate<IsPredicated>;
-
-def IsCPSRDefined : CheckFunctionPredicateWithTII<
-  "ARM_MC::isCPSRDefined",
-  "ARMBaseInstrInfo::isCPSRDefined"
->;
-
-def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>;
-
-let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in {
-  class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">;
-  class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">;
-}
-
-let FunctionMapper = "ARM_AM::getAM2Op" in {
-  class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {}
-  class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
-}
-
-let FunctionMapper = "ARM_AM::getAM2Offset" in {
-  class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {}
-}
-
-def IsLDMBaseRegInList : CheckFunctionPredicate<
-  "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList"
->;
-
-let FunctionMapper = "ARM_AM::getAM3Op" in {
-  class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {}
-}
-
-// LDM, base reg in list
-def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
-
-class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
-
-class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
-                     list<int> rcl, SchedWriteRes wr> :
-  SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
-  let Latency = !add(wr.Latency, lat);
-  let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
-  let NumMicroOps = !add(wr.NumMicroOps, uops);
-  SchedWriteRes BaseWr = wr;
-}
-
-class CheckBranchForm<int n, BranchWriteRes br> :
-  SchedWriteVariant<[
-    SchedVar<IsRegPCPred<n>, [br]>,
-    SchedVar<NoSchedPred,    [br.BaseWr]>
-  ]>;
+def IsPredicated : CheckFunctionPredicateWithTII< 
+  "ARM_MC::isPredicated", 
+  "isPredicated" 
+>; 
+def IsPredicatedPred : MCSchedPredicate<IsPredicated>; 
 
+def IsCPSRDefined : CheckFunctionPredicateWithTII< 
+  "ARM_MC::isCPSRDefined", 
+  "ARMBaseInstrInfo::isCPSRDefined" 
+>; 
+ 
+def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>; 
+ 
+let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in { 
+  class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">; 
+  class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">; 
+} 
+ 
+let FunctionMapper = "ARM_AM::getAM2Op" in { 
+  class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {} 
+  class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {} 
+} 
+ 
+let FunctionMapper = "ARM_AM::getAM2Offset" in { 
+  class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {} 
+} 
+ 
+def IsLDMBaseRegInList : CheckFunctionPredicate< 
+  "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList" 
+>; 
+ 
+let FunctionMapper = "ARM_AM::getAM3Op" in { 
+  class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {} 
+} 
+ 
+// LDM, base reg in list 
+def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>; 
+ 
+class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>; 
+ 
+class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl, 
+                     list<int> rcl, SchedWriteRes wr> : 
+  SchedWriteRes<!listconcat(wr.ProcResources, resl)> { 
+  let Latency = !add(wr.Latency, lat); 
+  let ResourceCycles = !listconcat(wr.ResourceCycles, rcl); 
+  let NumMicroOps = !add(wr.NumMicroOps, uops); 
+  SchedWriteRes BaseWr = wr; 
+} 
+ 
+class CheckBranchForm<int n, BranchWriteRes br> : 
+  SchedWriteVariant<[ 
+    SchedVar<IsRegPCPred<n>, [br]>, 
+    SchedVar<NoSchedPred,    [br.BaseWr]> 
+  ]>; 
+ 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
 //
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
index 0c610a4839..fe8c220db4 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57.td
@@ -21,47 +21,47 @@
 // Therefore, IssueWidth is set to the narrower of the two at three, while still
 // modeling the machine as out-of-order.
 
-def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
+def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>; 
 def IsCPSRDefinedAndPredicatedPred :
-    MCSchedPredicate<IsCPSRDefinedAndPredicated>;
+    MCSchedPredicate<IsCPSRDefinedAndPredicated>; 
 
 // Cortex A57 rev. r1p0 or later (false = r0px)
-def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
+def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>; 
 
-def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
-def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
-def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
+def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>; 
+def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>; 
+def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>; 
 
 // If Addrmode3 contains "minus register"
-class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
-                                      CheckValidRegOperand<n>,
-                                      CheckAM3OpSub<!add(n, 1)>]>>;
-
-def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
-def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
-def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
-
+class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[ 
+                                      CheckValidRegOperand<n>, 
+                                      CheckAM3OpSub<!add(n, 1)>]>>; 
+
+def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>; 
+def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>; 
+def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>; 
+ 
 // Load, scaled register offset, not plus LSL2
-class ScaledRegNotPlusLsl2<int n> : CheckNot<
-                                      CheckAny<[
-                                        CheckAM2NoShift<n>,
-                                        CheckAll<[
-                                          CheckAM2OpAdd<n>,
-                                          CheckAM2ShiftLSL<n>,
-                                          CheckAM2Offset<n, 2>
-                                        ]>
-                                      ]>
-                                    >;
-
-def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
-def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
-def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
-
-def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
-
-def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
-def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
-def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
+class ScaledRegNotPlusLsl2<int n> : CheckNot< 
+                                      CheckAny<[ 
+                                        CheckAM2NoShift<n>, 
+                                        CheckAll<[ 
+                                          CheckAM2OpAdd<n>, 
+                                          CheckAM2ShiftLSL<n>, 
+                                          CheckAM2Offset<n, 2> 
+                                        ]> 
+                                      ]> 
+                                    >; 
+
+def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>; 
+def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>; 
+def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>; 
+
+def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>; 
+
+def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>; 
+def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>; 
+def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>; 
 
 class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
   list <SchedWriteRes> Writes = writes;
@@ -173,29 +173,29 @@ def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
 
 def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
 
-// Check branch forms of ALU ops:
-// check reg 0 for ARM_AM::PC
-// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
-class A57BranchForm<SchedWriteRes non_br> :
-  BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
-
+// Check branch forms of ALU ops: 
+// check reg 0 for ARM_AM::PC 
+// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB 
+class A57BranchForm<SchedWriteRes non_br> : 
+  BranchWriteRes<2, 1, [A57UnitB], [1], non_br>; 
+ 
 // shift by register, conditional or unconditional
 // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
 // Why more complex instruction uses more simple pipeline?
 // May be an error in doc.
 def A57WriteALUsr : SchedWriteVariant<[
-  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
-  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
+  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 
+  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 
 ]>;
 def A57WriteALUSsr : SchedWriteVariant<[
-  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
-  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
+  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 
+  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 
 ]>;
 def A57ReadALUsr : SchedReadVariant<[
   SchedVar<IsPredicatedPred, [ReadDefault]>,
   SchedVar<NoSchedPred,      [ReadDefault]>
 ]>;
-def : SchedAlias<WriteALUsi,  CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
+def : SchedAlias<WriteALUsi,  CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>; 
 def : SchedAlias<WriteALUsr,  A57WriteALUsr>;
 def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
 def : SchedAlias<ReadALUsr,   A57ReadALUsr>;
@@ -271,11 +271,11 @@ def : ReadAdvance<ReadMUL, 0>;
 // from similar μops, allowing a typical sequence of multiply-accumulate μops
 // to issue one every 1 cycle (sched advance = 2).
 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
-def A57WriteMLAL : SchedWriteVariant<[
-  SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
-  SchedVar<NoSchedPred,       [A57Write_4cyc_1M]>
-]>;
-
+def A57WriteMLAL : SchedWriteVariant<[ 
+  SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>, 
+  SchedVar<NoSchedPred,       [A57Write_4cyc_1M]> 
+]>; 
+ 
 def A57ReadMLA  : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
 
 def : InstRW<[A57WriteMLA],
@@ -470,11 +470,11 @@ def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
   "LDRB_POST_REG", "LDR(B?)T_POST$")>;
 
 def A57WriteLdrTRegPost : SchedWriteVariant<[
-  SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
+  SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>, 
   SchedVar<NoSchedPred,        [A57Write_4cyc_1L_1I]>
 ]>;
 def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
-  SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
+  SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>, 
   SchedVar<NoSchedPred,        [A57WrBackTwo]>
 ]>;
 // 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
@@ -510,12 +510,12 @@ def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
 
 // --- Load multiple instructions ---
 foreach NumAddr = 1-8 in {
-  def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
-                                CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
-                                CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
-  def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
-                                   CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
-                                   CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
+  def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[ 
+                                CheckNumOperands<!add(!shl(NumAddr, 1), 2)>, 
+                                CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>; 
+  def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[ 
+                                   CheckNumOperands<!add(!shl(NumAddr, 1), 3)>, 
+                                   CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>; 
 }
 
 def A57LDMOpsListNoregin : A57WriteLMOpsListType<
@@ -571,20 +571,20 @@ def A57LDMOpsList_Upd : A57WriteLMOpsListType<
                A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
                A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
 def A57WriteLDM_Upd : SchedWriteVariant<[
-  SchedVar<A57LMAddrUpdPred1,     A57LDMOpsList_Upd.Writes[0-2]>,
-  SchedVar<A57LMAddrUpdPred2,     A57LDMOpsList_Upd.Writes[0-4]>,
-  SchedVar<A57LMAddrUpdPred3,     A57LDMOpsList_Upd.Writes[0-6]>,
-  SchedVar<A57LMAddrUpdPred4,     A57LDMOpsList_Upd.Writes[0-8]>,
-  SchedVar<A57LMAddrUpdPred5,     A57LDMOpsList_Upd.Writes[0-10]>,
-  SchedVar<A57LMAddrUpdPred6,     A57LDMOpsList_Upd.Writes[0-12]>,
-  SchedVar<A57LMAddrUpdPred7,     A57LDMOpsList_Upd.Writes[0-14]>,
-  SchedVar<A57LMAddrUpdPred8,     A57LDMOpsList_Upd.Writes[0-16]>,
-  SchedVar<NoSchedPred,           A57LDMOpsList_Upd.Writes[0-16]>
+  SchedVar<A57LMAddrUpdPred1,     A57LDMOpsList_Upd.Writes[0-2]>, 
+  SchedVar<A57LMAddrUpdPred2,     A57LDMOpsList_Upd.Writes[0-4]>, 
+  SchedVar<A57LMAddrUpdPred3,     A57LDMOpsList_Upd.Writes[0-6]>, 
+  SchedVar<A57LMAddrUpdPred4,     A57LDMOpsList_Upd.Writes[0-8]>, 
+  SchedVar<A57LMAddrUpdPred5,     A57LDMOpsList_Upd.Writes[0-10]>, 
+  SchedVar<A57LMAddrUpdPred6,     A57LDMOpsList_Upd.Writes[0-12]>, 
+  SchedVar<A57LMAddrUpdPred7,     A57LDMOpsList_Upd.Writes[0-14]>, 
+  SchedVar<A57LMAddrUpdPred8,     A57LDMOpsList_Upd.Writes[0-16]>, 
+  SchedVar<NoSchedPred,           A57LDMOpsList_Upd.Writes[0-16]> 
 ]> { let Variadic=1; }
 
 def A57WriteLDM : SchedWriteVariant<[
-  SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
-  SchedVar<NoSchedPred,            [A57WriteLDMnoreginlist]>
+  SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>, 
+  SchedVar<NoSchedPred,            [A57WriteLDMnoreginlist]> 
 ]> { let Variadic=1; }
 
 def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
@@ -1194,7 +1194,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
 // --- 3.16 ASIMD Miscellaneous Instructions ---
 
 // ASIMD bitwise insert
-def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>; 
 
 // ASIMD count
 def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
@@ -1483,7 +1483,7 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
 // -----------------------------------------------------------------------------
 // Common definitions
 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
+def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>; 
 
 def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
 def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 531b10bc5c..3ed917682c 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -36,16 +36,16 @@ def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
 def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
                                                     let ResourceCycles = [20]; }
 def A57Write_1cyc_1B  : SchedWriteRes<[A57UnitB]> { let Latency = 1;  }
-def A57Write_1cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 1;
-                                                    let ResourceCycles = [1]; }
-def A57Write_2cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 2;
-                                                    let ResourceCycles = [1]; }
+def A57Write_1cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 1; 
+                                                    let ResourceCycles = [1]; } 
+def A57Write_2cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 2; 
+                                                    let ResourceCycles = [1]; } 
 def A57Write_3cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 3;  }
 def A57Write_1cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 1;  }
 def A57Write_2cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 2;  }
 def A57Write_3cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 3;  }
-def A57Write_2cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 2;
-                                                    let ResourceCycles = [1]; }
+def A57Write_2cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 2; 
+                                                    let ResourceCycles = [1]; } 
 def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
                                                     let ResourceCycles = [32]; }
 def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
@@ -71,7 +71,7 @@ foreach Lat = 4-16 in {
   }
 }
 
-def A57Write_4cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 4;  }
+def A57Write_4cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 4;  } 
 def A57Write_4cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
 def A57Write_4cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 4;  }
 def A57Write_5cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 5;  }
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
index be7017a7b4..dfda6c6b4b 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleA9.td
@@ -2525,8 +2525,8 @@ def : ReadAdvance<ReadFPMAC, 0>;
 def : InstRW< [WriteALU],
       (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
                  "BICrr")>;
-def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
-def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
+def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>; 
+def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>; 
 
 
 def : SchedAlias<WriteCMP, A9WriteALU>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
index 12296ad092..c5e1d32e8d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleM7.td
@@ -1,488 +1,488 @@
-//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor.
-//
-//===----------------------------------------------------------------------===//
-
-def CortexM7Model : SchedMachineModel {
-  let IssueWidth = 2;        // Dual issue for most instructions.
-  let MicroOpBufferSize = 0; // The Cortex-M7 is in-order.
-  let LoadLatency = 2;       // Best case for load-use case.
-  let MispredictPenalty = 4; // Mispredict cost for forward branches is 6,
-                             // but 4 works better
-  let CompleteModel = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
-// pipe. The stages relevant to scheduling are as follows:
-//
-//   EX1: address generation  shifts
-//   EX2: fast load data      ALUs                  FP operation
-//   EX3: slow load data      integer writeback     FP operation
-//   EX4: store data                                FP writeback
-//
-// There are shifters in both EX1 and EX2, and some instructions can be
-// flexibly allocated between them.  EX2 is used as the "zero" point
-// for scheduling, so simple ALU operations executing in EX2 will have
-// ReadAdvance<0> (the default) for their source operands and Latency = 1.
-
-def M7UnitLoad   : ProcResource<2> { let BufferSize = 0; }
-def M7UnitStore  : ProcResource<1> { let BufferSize = 0; }
-def M7UnitALU    : ProcResource<2>;
-def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
-def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
-def M7UnitMAC    : ProcResource<1> { let BufferSize = 0; }
-def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
-def M7UnitVFP    : ProcResource<1> { let BufferSize = 0; }
-def M7UnitVPort  : ProcResource<2> { let BufferSize = 0; }
-def M7UnitSIMD   : ProcResource<1> { let BufferSize = 0; }
-
-//===---------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types with map ProcResources and set latency.
-
-let SchedModel = CortexM7Model in {
-
-def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
-
-// Basic ALU with shifts.
-let Latency = 1 in {
-  def : WriteRes<WriteALUsi,  [M7UnitALU, M7UnitShift1]>;
-  def : WriteRes<WriteALUsr,  [M7UnitALU, M7UnitShift1]>;
-  def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>;
-}
-
-// Compares.
-def : WriteRes<WriteCMP,   [M7UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
-def : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; }
-
-// Multiplies.
-let Latency = 2 in {
-  def : WriteRes<WriteMUL16,   [M7UnitMAC]>;
-  def : WriteRes<WriteMUL32,   [M7UnitMAC]>;
-  def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>;
-  def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
-}
-
-// Multiply-accumulates.
-let Latency = 2 in {
-  def : WriteRes<WriteMAC16,   [M7UnitMAC]>;
-  def : WriteRes<WriteMAC32,   [M7UnitMAC]>;
-  def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; }
-  def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
-}
-
-// Divisions.
-// These cannot be dual-issued with any instructions.
-def : WriteRes<WriteDIV, [M7UnitALU]> {
-  let Latency = 7;
-  let SingleIssue = 1;
-}
-
-// Loads/Stores.
-def : WriteRes<WriteLd,    [M7UnitLoad]> { let Latency = 1; }
-def : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; }
-def : WriteRes<WriteST,    [M7UnitStore]> { let Latency = 2; }
-
-// Branches.
-def : WriteRes<WriteBr,    [M7UnitBranch]> { let Latency = 2; }
-def : WriteRes<WriteBrL,   [M7UnitBranch]> { let Latency = 2; }
-def : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; }
-
-// Noop.
-def : WriteRes<WriteNoop, []> { let Latency = 0; }
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for floating-point instructions
-//
-// Floating point conversions.
-def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPMOV, [M7UnitVPort]>            { let Latency = 3; }
-
-// The FP pipeline has a latency of 3 cycles.
-// ALU operations (32/64-bit).  These go down the FP pipeline.
-def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 3; }
-def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 4;
-  let BeginGroup = 1;
-}
-
-// Multiplication
-def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 7;
-  let BeginGroup = 1;
-}
-
-// Multiply-accumulate.  FPMAC goes down the FP Pipeline.
-def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
-def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 11;
-  let BeginGroup = 1;
-}
-
-// Division.   Effective scheduling latency is 3, though real latency is larger
-def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 16; }
-def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 30;
-  let BeginGroup = 1;
-}
-
-// Square-root.  Effective scheduling latency is 3; real latency is larger
-def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
-def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 30;
-  let BeginGroup = 1;
-}
-
-def M7WriteShift2   : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {}
-
-// Not used for M7, but needing definitions anyway
-def : WriteRes<WriteVLD1, []>;
-def : WriteRes<WriteVLD2, []>;
-def : WriteRes<WriteVLD3, []>;
-def : WriteRes<WriteVLD4, []>;
-def : WriteRes<WriteVST1, []>;
-def : WriteRes<WriteVST2, []>;
-def : WriteRes<WriteVST3, []>;
-def : WriteRes<WriteVST4, []>;
-
-def M7SingleIssue : SchedWriteRes<[]> {
-  let SingleIssue = 1;
-  let NumMicroOps = 0;
-}
-def M7Slot0Only   : SchedWriteRes<[]> {
-  let BeginGroup = 1;
-  let NumMicroOps = 0;
-}
-
-// What pipeline stage operands need to be ready for depending on
-// where they come from.
-def : ReadAdvance<ReadALUsr, 0>;
-def : ReadAdvance<ReadMUL, 0>;
-def : ReadAdvance<ReadMAC, 1>;
-def : ReadAdvance<ReadALU, 0>;
-def : ReadAdvance<ReadFPMUL, 0>;
-def : ReadAdvance<ReadFPMAC, 3>;
-def M7Read_ISS : SchedReadAdvance<-1>;     // operands needed at EX1
-def M7Read_EX2   : SchedReadAdvance<1>;    // operands needed at EX3
-def M7Read_EX3   : SchedReadAdvance<2>;    // operands needed at EX4
-
-// Non general purpose instructions may not be dual issued. These
-// use both issue units.
-def M7NonGeneralPurpose : SchedWriteRes<[]> {
-  // Assume that these will go down the main ALU pipeline.
-  // In reality, many look likely to stall the whole pipeline.
-  let Latency = 3;
-  let SingleIssue = 1;
-}
-
-// List the non general purpose instructions.
-def : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT",
-                                     "t2MSR", "t2DMB", "t2DSB", "t2ISB",
-                                     "t2HVC", "t2SMC", "t2UDF", "ERET",
-                                     "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for load/store
-//
-// Mark whether the loads/stores must be single-issue
-// Address operands are needed earlier
-// Data operands are needed later
-
-def M7BaseUpdate : SchedWriteRes<[]> {
-    let Latency = 0; // Update is bypassable out of EX1
-    let NumMicroOps = 0;
-}
-def M7LoadLatency1 : SchedWriteRes<[]> {
-    let Latency = 1;
-    let NumMicroOps = 0;
-}
-def M7SlowLoad : SchedWriteRes<[M7UnitLoad]>            { let Latency = 2; }
-
-// Byte and half-word loads should have greater latency than other loads.
-// So should load exclusive.
-
-def : InstRW<[M7SlowLoad],
-      (instregex "t2LDR(B|H|SB|SH)pc")>;
-def : InstRW<[M7SlowLoad, M7Read_ISS],
-      (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
-                 "tLDR(B|H)i")>;
-def : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS],
-      (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>;
-def : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS],
-      (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
-
-// Exclusive loads/stores cannot be dual-issued
-def : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS],
-      (instregex "t2LDREX$")>;
-def : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS],
-      (instregex "t2LDREX(B|H)")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS],
-      (instregex "t2STREX(B|H)?$")>;
-
-// Load/store multiples cannot be dual-issued.  Note that default scheduling
-// occurs around read/write times of individual registers in the list; read
-// time for STM cannot be overridden because it is a variadic source operand.
-
-def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
-      (instregex "(t|t2)LDM(DB|IA)$")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
-      (instregex "(t|t2)STM(DB|IA)$")>;
-def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
-      (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
-      (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
-
-// Load/store doubles cannot be dual-issued.
-
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue,
-              M7Read_EX2, M7Read_EX2, M7Read_ISS],
-      (instregex "t2STRD_(PRE|POST)")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS],
-      (instregex "t2STRDi")>;
-def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS],
-      (instregex "t2LDRD_(PRE|POST)")>;
-def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS],
-      (instregex "t2LDRDi")>;
-
-// Word load / preload
-def : InstRW<[WriteLd],
-      (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
-def : InstRW<[WriteLd, M7Read_ISS],
-      (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>;
-def : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS],
-      (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
-def : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS],
-      (instregex "t2LDR_(POST|PRE)")>;
-
-// Stores
-def : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS],
-      (instregex "t2STR(B|H)?_(POST|PRE)")>;
-def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS],
-      (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
-def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS],
-      (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
-
-// TBB/TBH - single-issue only; takes two cycles to issue
-
-def M7TableLoad : SchedWriteRes<[M7UnitLoad]> {
-  let NumMicroOps = 2;
-  let SingleIssue = 1;
-}
-
-def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
-
-// VFP loads and stores
-
-def M7LoadSP  : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
-def M7LoadDP  : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 2;
-  let SingleIssue = 1;
-}
-def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
-def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
-  let SingleIssue = 1;
-}
-
-def : InstRW<[M7LoadSP, M7Read_ISS],                 (instregex "VLDR(S|H)$")>;
-def : InstRW<[M7LoadDP, M7Read_ISS],                 (instregex "VLDRD$")>;
-def : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS],    (instregex "VSTR(S|H)$")>;
-def : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS],    (instregex "VSTRD$")>;
-
-// Load/store multiples cannot be dual-issued.
-
-def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS],
-      (instregex "VLDM(S|D|Q)(DB|IA)$")>;
-def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS],
-      (instregex "VSTM(S|D|Q)(DB|IA)$")>;
-def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS],
-      (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>;
-def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS],
-      (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for ALU
-//
-
-// Shifted ALU operands are read a cycle early.
-def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
-
-def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
-             (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
-                        "t2(SUB|CMP|CMNz|TEQ|TST)rs$",
-                        "t2MOVsr(a|l)")>;
-def : InstRW<[WriteALUsi, M7Read_ISS],
-             (instregex "t2MVNs")>;
-
-// Treat pure shift operations (except for RRX) as if they used the EX1
-// shifter but have timing as if they used the EX2 shifter as they usually
-// can choose the EX2 shifter when needed.  Will miss a few dual-issue cases,
-// but the results prove to be better than trying to get them exact.
-
-def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
-def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
-
-// Instructions that use the shifter, but have normal timing.
-
-def : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>;
-
-// Instructions which are slot zero only but otherwise normal.
-
-def : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>;
-
-// MAC operations that don't have SchedRW set.
-
-def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
-
-// Divides are special because they stall for their latency, and so look like a
-// single-cycle as far as scheduling opportunities go.  By putting WriteALU
-// first, we make the operand latency 1, but keep the instruction latency 7.
-
-def : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>;
-
-// DSP extension operations
-
-def M7WriteSIMD1   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
-  let Latency = 1;
-  let BeginGroup = 1;
-}
-def M7WriteSIMD2   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> {
-  let Latency = 2;
-  let BeginGroup = 1;
-}
-def M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
-  let Latency = 1;
-  let BeginGroup = 1;
-}
-def M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
-  let Latency = 0;      // Bypassable out of EX1
-  let BeginGroup = 1;
-}
-def M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> {
-  let Latency = 2;
-  let BeginGroup = 1;
-}
-
-def : InstRW<[M7WriteShSIMD2, M7Read_ISS],
-             (instregex "t2(S|U)SAT")>;
-def : InstRW<[M7WriteSIMD1, ReadALU],
-             (instregex "(t|t2)(S|U)XT(B|H)")>;
-def : InstRW<[M7WriteSIMD1, ReadALU, ReadALU],
-             (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
-                        "t2SEL")>;
-def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU],
-             (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
-def : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS],
-             (instregex "t2QD(ADD|SUB)")>;
-def : InstRW<[M7WriteShSIMD0, M7Read_ISS],
-             (instregex "t2(RBIT|REV)", "tREV")>;
-def : InstRW<[M7WriteShSIMD1, M7Read_ISS],
-             (instregex "t2(SBFX|UBFX)")>;
-def : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS],
-             (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
-def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2],
-             (instregex "t2USADA8")>;
-
-// MSR/MRS
-def : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>;
-
-//===---------------------------------------------------------------------===//
-// Sched definitions for FP operations
-//
-
-// Effective scheduling latency is really 3 for nearly all FP operations,
-// even if their true latency is higher.
-def M7WriteVFPLatOverride : SchedWriteRes<[]> {
-  let Latency = 3;
-  let NumMicroOps = 0;
-}
-def M7WriteVFPExtraVPort  : SchedWriteRes<[M7UnitVPort]> {
-  let Latency = 3;
-  let NumMicroOps = 0;
-}
-
-// Instructions which are missing default schedules.
-def : InstRW<[WriteFPALU32],
-             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
-             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>;
-
-// VCMP
-def M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; }
-def M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
-  let Latency = 0;
-  let BeginGroup = 1;
-}
-def : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>;
-def : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>;
-
-    // VMRS/VMSR
-def M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
-def M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; }
-def : InstRW<[M7VMRS], (instregex "FMSTAT")>;
-def : InstRW<[M7VMSR], (instregex "VMSR")>;
-
-// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
-def : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS],
-             (instregex "VSEL.*S$")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only,
-              ReadALU, ReadALU, M7Read_ISS],
-             (instregex "VSEL.*D$")>;
-
-// VMOV
-def : InstRW<[WriteFPMOV],
-             (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
-             (instregex "VMOVD$")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only],
-             (instregex "FCONSTD")>;
-def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue],
-             (instregex "VMOV(DRR|RRD|RRS|SRR)")>;
-
-// Larger-latency overrides.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32],  (instregex "VDIVS")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64],  (instregex "VDIVD")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64],
-             (instregex "V(MUL|NMUL)D")>;
-def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64],
-             (instregex "V(ADD|SUB)D")>;
-
-// Multiply-accumulate.  Chained SP timing is correct; rest need overrides
-// Double-precision chained MAC stalls the pipeline behind it for 3 cycles,
-// making it appear to have 3 cycle latency for scheduling.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
-              ReadFPMAC, ReadFPMUL, ReadFPMUL],
-             (instregex "V(N)?ML(A|S)D$")>;
-
-// Single-precision fused MACs look like latency 5 with advance of 2.
-
-def M7WriteVFPLatOverride5 : SchedWriteRes<[]> {
-  let Latency = 5;
-  let NumMicroOps = 0;
-}
-def M7ReadFPMAC2   : SchedReadAdvance<2>;
-
-def : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32,
-              M7ReadFPMAC2, ReadFPMUL, ReadFPMUL],
-             (instregex "VF(N)?M(A|S)S$")>;
-
-// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making
-// it appear to have 3 cycle latency for scheduling.
-
-def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64,
-              ReadFPMAC, ReadFPMUL, ReadFPMUL],
-             (instregex "VF(N)?M(A|S)D$")>;
-
-}  // SchedModel = CortexM7Model
+//=- ARMScheduleM7.td - ARM Cortex-M7 Scheduling Definitions -*- tablegen -*-=// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file defines the SchedRead/Write data for the ARM Cortex-M7 processor. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+def CortexM7Model : SchedMachineModel { 
+  let IssueWidth = 2;        // Dual issue for most instructions. 
+  let MicroOpBufferSize = 0; // The Cortex-M7 is in-order. 
+  let LoadLatency = 2;       // Best case for load-use case. 
+  let MispredictPenalty = 4; // Mispredict cost for forward branches is 6, 
+                             // but 4 works better 
+  let CompleteModel = 0; 
+} 
+ 
+//===--------------------------------------------------------------------===// 
+// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP 
+// pipe. The stages relevant to scheduling are as follows: 
+// 
+//   EX1: address generation  shifts 
+//   EX2: fast load data      ALUs                  FP operation 
+//   EX3: slow load data      integer writeback     FP operation 
+//   EX4: store data                                FP writeback 
+// 
+// There are shifters in both EX1 and EX2, and some instructions can be 
+// flexibly allocated between them.  EX2 is used as the "zero" point 
+// for scheduling, so simple ALU operations executing in EX2 will have 
+// ReadAdvance<0> (the default) for their source operands and Latency = 1. 
+ 
+def M7UnitLoad   : ProcResource<2> { let BufferSize = 0; } 
+def M7UnitStore  : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitALU    : ProcResource<2>; 
+def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitMAC    : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitBranch : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitVFP    : ProcResource<1> { let BufferSize = 0; } 
+def M7UnitVPort  : ProcResource<2> { let BufferSize = 0; } 
+def M7UnitSIMD   : ProcResource<1> { let BufferSize = 0; } 
+ 
+//===---------------------------------------------------------------------===// 
+// Subtarget-specific SchedWrite types with map ProcResources and set latency. 
+ 
+let SchedModel = CortexM7Model in { 
+ 
+def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; } 
+ 
+// Basic ALU with shifts. 
+let Latency = 1 in { 
+  def : WriteRes<WriteALUsi,  [M7UnitALU, M7UnitShift1]>; 
+  def : WriteRes<WriteALUsr,  [M7UnitALU, M7UnitShift1]>; 
+  def : WriteRes<WriteALUSsr, [M7UnitALU, M7UnitShift1]>; 
+} 
+ 
+// Compares. 
+def : WriteRes<WriteCMP,   [M7UnitALU]> { let Latency = 1; } 
+def : WriteRes<WriteCMPsi, [M7UnitALU, M7UnitShift1]> { let Latency = 2; } 
+def : WriteRes<WriteCMPsr, [M7UnitALU, M7UnitShift1]> { let Latency = 2; } 
+ 
+// Multiplies. 
+let Latency = 2 in { 
+  def : WriteRes<WriteMUL16,   [M7UnitMAC]>; 
+  def : WriteRes<WriteMUL32,   [M7UnitMAC]>; 
+  def : WriteRes<WriteMUL64Lo, [M7UnitMAC]>; 
+  def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; } 
+} 
+ 
+// Multiply-accumulates. 
+let Latency = 2 in { 
+  def : WriteRes<WriteMAC16,   [M7UnitMAC]>; 
+  def : WriteRes<WriteMAC32,   [M7UnitMAC]>; 
+  def : WriteRes<WriteMAC64Lo, [M7UnitMAC]> { let Latency = 2; } 
+  def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; } 
+} 
+ 
+// Divisions. 
+// These cannot be dual-issued with any instructions. 
+def : WriteRes<WriteDIV, [M7UnitALU]> { 
+  let Latency = 7; 
+  let SingleIssue = 1; 
+} 
+ 
+// Loads/Stores. 
+def : WriteRes<WriteLd,    [M7UnitLoad]> { let Latency = 1; } 
+def : WriteRes<WritePreLd, [M7UnitLoad]> { let Latency = 2; } 
+def : WriteRes<WriteST,    [M7UnitStore]> { let Latency = 2; } 
+ 
+// Branches. 
+def : WriteRes<WriteBr,    [M7UnitBranch]> { let Latency = 2; } 
+def : WriteRes<WriteBrL,   [M7UnitBranch]> { let Latency = 2; } 
+def : WriteRes<WriteBrTbl, [M7UnitBranch]> { let Latency = 2; } 
+ 
+// Noop. 
+def : WriteRes<WriteNoop, []> { let Latency = 0; } 
+ 
+//===---------------------------------------------------------------------===// 
+// Sched definitions for floating-point instructions 
+// 
+// Floating point conversions. 
+def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; } 
+def : WriteRes<WriteFPMOV, [M7UnitVPort]>            { let Latency = 3; } 
+ 
+// The FP pipeline has a latency of 3 cycles. 
+// ALU operations (32/64-bit).  These go down the FP pipeline. 
+def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 3; } 
+def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 4; 
+  let BeginGroup = 1; 
+} 
+ 
+// Multiplication 
+def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; } 
+def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 7; 
+  let BeginGroup = 1; 
+} 
+ 
+// Multiply-accumulate.  FPMAC goes down the FP Pipeline. 
+def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; } 
+def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 11; 
+  let BeginGroup = 1; 
+} 
+ 
+// Division.   Effective scheduling latency is 3, though real latency is larger 
+def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]>  { let Latency = 16; } 
+def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 30; 
+  let BeginGroup = 1; 
+} 
+ 
+// Square-root.  Effective scheduling latency is 3; real latency is larger 
+def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; } 
+def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 30; 
+  let BeginGroup = 1; 
+} 
+ 
+def M7WriteShift2   : SchedWriteRes<[M7UnitALU, M7UnitShift2]> {} 
+ 
+// Not used for M7, but needing definitions anyway 
+def : WriteRes<WriteVLD1, []>; 
+def : WriteRes<WriteVLD2, []>; 
+def : WriteRes<WriteVLD3, []>; 
+def : WriteRes<WriteVLD4, []>; 
+def : WriteRes<WriteVST1, []>; 
+def : WriteRes<WriteVST2, []>; 
+def : WriteRes<WriteVST3, []>; 
+def : WriteRes<WriteVST4, []>; 
+ 
+def M7SingleIssue : SchedWriteRes<[]> { 
+  let SingleIssue = 1; 
+  let NumMicroOps = 0; 
+} 
+def M7Slot0Only   : SchedWriteRes<[]> { 
+  let BeginGroup = 1; 
+  let NumMicroOps = 0; 
+} 
+ 
+// What pipeline stage operands need to be ready for depending on 
+// where they come from. 
+def : ReadAdvance<ReadALUsr, 0>; 
+def : ReadAdvance<ReadMUL, 0>; 
+def : ReadAdvance<ReadMAC, 1>; 
+def : ReadAdvance<ReadALU, 0>; 
+def : ReadAdvance<ReadFPMUL, 0>; 
+def : ReadAdvance<ReadFPMAC, 3>; 
+def M7Read_ISS : SchedReadAdvance<-1>;     // operands needed at EX1 
+def M7Read_EX2   : SchedReadAdvance<1>;    // operands needed at EX3 
+def M7Read_EX3   : SchedReadAdvance<2>;    // operands needed at EX4 
+ 
+// Non general purpose instructions may not be dual issued. These 
+// use both issue units. 
+def M7NonGeneralPurpose : SchedWriteRes<[]> { 
+  // Assume that these will go down the main ALU pipeline. 
+  // In reality, many look likely to stall the whole pipeline. 
+  let Latency = 3; 
+  let SingleIssue = 1; 
+} 
+ 
+// List the non general purpose instructions. 
+def : InstRW<[M7NonGeneralPurpose], (instregex "t2MRS", "tSVC", "tBKPT", 
+                                     "t2MSR", "t2DMB", "t2DSB", "t2ISB", 
+                                     "t2HVC", "t2SMC", "t2UDF", "ERET", 
+                                     "tHINT", "t2HINT", "t2CLREX", "BUNDLE")>; 
+ 
+//===---------------------------------------------------------------------===// 
+// Sched definitions for load/store 
+// 
+// Mark whether the loads/stores must be single-issue 
+// Address operands are needed earlier 
+// Data operands are needed later 
+ 
+def M7BaseUpdate : SchedWriteRes<[]> { 
+    let Latency = 0; // Update is bypassable out of EX1 
+    let NumMicroOps = 0; 
+} 
+def M7LoadLatency1 : SchedWriteRes<[]> { 
+    let Latency = 1; 
+    let NumMicroOps = 0; 
+} 
+def M7SlowLoad : SchedWriteRes<[M7UnitLoad]>            { let Latency = 2; } 
+ 
+// Byte and half-word loads should have greater latency than other loads. 
+// So should load exclusive. 
+ 
+def : InstRW<[M7SlowLoad], 
+      (instregex "t2LDR(B|H|SB|SH)pc")>; 
+def : InstRW<[M7SlowLoad, M7Read_ISS], 
+      (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i", 
+                 "tLDR(B|H)i")>; 
+def : InstRW<[M7SlowLoad, M7Read_ISS, M7Read_ISS], 
+      (instregex "t2LDR(B|H|SB|SH)s", "tLDR(B|H)r", "tLDR(SB|SH)")>; 
+def : InstRW<[M7SlowLoad, M7BaseUpdate, M7Read_ISS], 
+      (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>; 
+ 
+// Exclusive loads/stores cannot be dual-issued 
+def : InstRW<[WriteLd, M7Slot0Only, M7Read_ISS], 
+      (instregex "t2LDREX$")>; 
+def : InstRW<[M7SlowLoad, M7Slot0Only, M7Read_ISS], 
+      (instregex "t2LDREX(B|H)")>; 
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_ISS], 
+      (instregex "t2STREX(B|H)?$")>; 
+ 
+// Load/store multiples cannot be dual-issued.  Note that default scheduling 
+// occurs around read/write times of individual registers in the list; read 
+// time for STM cannot be overridden because it is a variadic source operand. 
+ 
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS], 
+      (instregex "(t|t2)LDM(DB|IA)$")>; 
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS], 
+      (instregex "(t|t2)STM(DB|IA)$")>; 
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS], 
+      (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>; 
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS], 
+      (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>; 
+ 
+// Load/store doubles cannot be dual-issued. 
+ 
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, 
+              M7Read_EX2, M7Read_EX2, M7Read_ISS], 
+      (instregex "t2STRD_(PRE|POST)")>; 
+def : InstRW<[WriteST, M7SingleIssue, M7Read_EX2, M7Read_EX2, M7Read_ISS], 
+      (instregex "t2STRDi")>; 
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7BaseUpdate, M7Read_ISS], 
+      (instregex "t2LDRD_(PRE|POST)")>; 
+def : InstRW<[WriteLd, M7LoadLatency1, M7SingleIssue, M7Read_ISS], 
+      (instregex "t2LDRDi")>; 
+ 
+// Word load / preload 
+def : InstRW<[WriteLd], 
+      (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>; 
+def : InstRW<[WriteLd, M7Read_ISS], 
+      (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi", "tLDRspi")>; 
+def : InstRW<[WriteLd, M7Read_ISS, M7Read_ISS], 
+      (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>; 
+def : InstRW<[WriteLd, M7BaseUpdate, M7Read_ISS], 
+      (instregex "t2LDR_(POST|PRE)")>; 
+ 
+// Stores 
+def : InstRW<[M7BaseUpdate, WriteST, M7Read_EX2, M7Read_ISS], 
+      (instregex "t2STR(B|H)?_(POST|PRE)")>; 
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS, M7Read_ISS], 
+      (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>; 
+def : InstRW<[WriteST, M7Read_EX2, M7Read_ISS], 
+      (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>; 
+ 
+// TBB/TBH - single-issue only; takes two cycles to issue 
+ 
+def M7TableLoad : SchedWriteRes<[M7UnitLoad]> { 
+  let NumMicroOps = 2; 
+  let SingleIssue = 1; 
+} 
+ 
+def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>; 
+ 
+// VFP loads and stores 
+ 
+def M7LoadSP  : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; } 
+def M7LoadDP  : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 2; 
+  let SingleIssue = 1; 
+} 
+def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>; 
+def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> { 
+  let SingleIssue = 1; 
+} 
+ 
+def : InstRW<[M7LoadSP, M7Read_ISS],                 (instregex "VLDR(S|H)$")>; 
+def : InstRW<[M7LoadDP, M7Read_ISS],                 (instregex "VLDRD$")>; 
+def : InstRW<[M7StoreSP, M7Read_EX3, M7Read_ISS],    (instregex "VSTR(S|H)$")>; 
+def : InstRW<[M7StoreDP, M7Read_EX3, M7Read_ISS],    (instregex "VSTRD$")>; 
+ 
+// Load/store multiples cannot be dual-issued. 
+ 
+def : InstRW<[WriteLd, M7SingleIssue, M7Read_ISS], 
+      (instregex "VLDM(S|D|Q)(DB|IA)$")>; 
+def : InstRW<[WriteST, M7SingleIssue, M7Read_ISS], 
+      (instregex "VSTM(S|D|Q)(DB|IA)$")>; 
+def : InstRW<[M7BaseUpdate, WriteLd, M7SingleIssue, M7Read_ISS], 
+      (instregex "VLDM(S|D|Q)(DB|IA)_UPD$")>; 
+def : InstRW<[M7BaseUpdate, WriteST, M7SingleIssue, M7Read_ISS], 
+      (instregex "VSTM(S|D|Q)(DB|IA)_UPD$")>; 
+ 
+//===---------------------------------------------------------------------===// 
+// Sched definitions for ALU 
+// 
+ 
+// Shifted ALU operands are read a cycle early. 
+def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>; 
+ 
+def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS], 
+             (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$", 
+                        "t2(SUB|CMP|CMNz|TEQ|TST)rs$", 
+                        "t2MOVsr(a|l)")>; 
+def : InstRW<[WriteALUsi, M7Read_ISS], 
+             (instregex "t2MVNs")>; 
+ 
+// Treat pure shift operations (except for RRX) as if they used the EX1 
+// shifter but have timing as if they used the EX2 shifter as they usually 
+// can choose the EX2 shifter when needed.  Will miss a few dual-issue cases, 
+// but the results prove to be better than trying to get them exact. 
+ 
+def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>; 
+def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>; 
+ 
+// Instructions that use the shifter, but have normal timing. 
+ 
+def : InstRW<[WriteALUsi,M7Slot0Only], (instregex "t2(BFC|BFI)$")>; 
+ 
+// Instructions which are slot zero only but otherwise normal. 
+ 
+def : InstRW<[WriteALU, M7Slot0Only], (instregex "t2CLZ")>; 
+ 
+// MAC operations that don't have SchedRW set. 
+ 
+def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>; 
+ 
+// Divides are special because they stall for their latency, and so look like a 
+// single-cycle as far as scheduling opportunities go.  By putting WriteALU 
+// first, we make the operand latency 1, but keep the instruction latency 7. 
+ 
+def : InstRW<[WriteALU, WriteDIV], (instregex "t2(S|U)DIV")>; 
+ 
+// DSP extension operations 
+ 
+def M7WriteSIMD1   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> { 
+  let Latency = 1; 
+  let BeginGroup = 1; 
+} 
+def M7WriteSIMD2   : SchedWriteRes<[M7UnitSIMD, M7UnitALU]> { 
+  let Latency = 2; 
+  let BeginGroup = 1; 
+} 
+def M7WriteShSIMD1 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 
+  let Latency = 1; 
+  let BeginGroup = 1; 
+} 
+def M7WriteShSIMD0 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 
+  let Latency = 0;      // Bypassable out of EX1 
+  let BeginGroup = 1; 
+} 
+def M7WriteShSIMD2 : SchedWriteRes<[M7UnitSIMD, M7UnitALU, M7UnitShift1]> { 
+  let Latency = 2; 
+  let BeginGroup = 1; 
+} 
+ 
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS], 
+             (instregex "t2(S|U)SAT")>; 
+def : InstRW<[M7WriteSIMD1, ReadALU], 
+             (instregex "(t|t2)(S|U)XT(B|H)")>; 
+def : InstRW<[M7WriteSIMD1, ReadALU, ReadALU], 
+             (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)", 
+                        "t2SEL")>; 
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU], 
+             (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>; 
+def : InstRW<[M7WriteShSIMD2, M7Read_ISS, M7Read_ISS], 
+             (instregex "t2QD(ADD|SUB)")>; 
+def : InstRW<[M7WriteShSIMD0, M7Read_ISS], 
+             (instregex "t2(RBIT|REV)", "tREV")>; 
+def : InstRW<[M7WriteShSIMD1, M7Read_ISS], 
+             (instregex "t2(SBFX|UBFX)")>; 
+def : InstRW<[M7WriteShSIMD1, ReadALU, M7Read_ISS], 
+             (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>; 
+def : InstRW<[M7WriteSIMD2, ReadALU, ReadALU, M7Read_EX2], 
+             (instregex "t2USADA8")>; 
+ 
+// MSR/MRS 
+def : InstRW<[M7NonGeneralPurpose], (instregex "MSR", "MRS")>; 
+ 
+//===---------------------------------------------------------------------===// 
+// Sched definitions for FP operations 
+// 
+ 
+// Effective scheduling latency is really 3 for nearly all FP operations, 
+// even if their true latency is higher. 
+def M7WriteVFPLatOverride : SchedWriteRes<[]> { 
+  let Latency = 3; 
+  let NumMicroOps = 0; 
+} 
+def M7WriteVFPExtraVPort  : SchedWriteRes<[M7UnitVPort]> { 
+  let Latency = 3; 
+  let NumMicroOps = 0; 
+} 
+ 
+// Instructions which are missing default schedules. 
+def : InstRW<[WriteFPALU32], 
+             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)S$")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64], 
+             (instregex "V(ABS|CVT.*|NEG|FP_VMAX.*|FP_VMIN.*|RINT.*)D$")>; 
+ 
+// VCMP 
+def M7WriteVCMPS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let Latency = 0; } 
+def M7WriteVCMPD : SchedWriteRes<[M7UnitVFP, M7UnitVPort, M7UnitVPort]> { 
+  let Latency = 0; 
+  let BeginGroup = 1; 
+} 
+def : InstRW<[M7WriteVCMPS], (instregex "VCMPS$")>; 
+def : InstRW<[M7WriteVCMPD], (instregex "VCMPD$")>; 
+ 
+    // VMRS/VMSR 
+def M7VMRS : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; } 
+def M7VMSR : SchedWriteRes<[M7UnitVFP, M7UnitVPort]> { let SingleIssue = 1; } 
+def : InstRW<[M7VMRS], (instregex "FMSTAT")>; 
+def : InstRW<[M7VMSR], (instregex "VMSR")>; 
+ 
+// VSEL cannot bypass in its implied $cpsr operand; model as earlier read 
+def : InstRW<[WriteFPALU32, M7Slot0Only, ReadALU, ReadALU, M7Read_ISS], 
+             (instregex "VSEL.*S$")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64, M7Slot0Only, 
+              ReadALU, ReadALU, M7Read_ISS], 
+             (instregex "VSEL.*D$")>; 
+ 
+// VMOV 
+def : InstRW<[WriteFPMOV], 
+             (instregex "VMOV(H|S)$", "FCONST(H|S)")>; 
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only], 
+             (instregex "VMOVD$")>; 
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7Slot0Only], 
+             (instregex "FCONSTD")>; 
+def : InstRW<[WriteFPMOV, M7WriteVFPExtraVPort, M7SingleIssue], 
+             (instregex "VMOV(DRR|RRD|RRS|SRR)")>; 
+ 
+// Larger-latency overrides. 
+ 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV32],  (instregex "VDIVS")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPDIV64],  (instregex "VDIVD")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT32], (instregex "VSQRTS")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPSQRT64], (instregex "VSQRTD")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMUL64], 
+             (instregex "V(MUL|NMUL)D")>; 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPALU64], 
+             (instregex "V(ADD|SUB)D")>; 
+ 
+// Multiply-accumulate.  Chained SP timing is correct; rest need overrides 
+// Double-precision chained MAC stalls the pipeline behind it for 3 cycles, 
+// making it appear to have 3 cycle latency for scheduling. 
+ 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64, 
+              ReadFPMAC, ReadFPMUL, ReadFPMUL], 
+             (instregex "V(N)?ML(A|S)D$")>; 
+ 
+// Single-precision fused MACs look like latency 5 with advance of 2. 
+ 
+def M7WriteVFPLatOverride5 : SchedWriteRes<[]> { 
+  let Latency = 5; 
+  let NumMicroOps = 0; 
+} 
+def M7ReadFPMAC2   : SchedReadAdvance<2>; 
+ 
+def : InstRW<[M7WriteVFPLatOverride5, WriteFPMAC32, 
+              M7ReadFPMAC2, ReadFPMUL, ReadFPMUL], 
+             (instregex "VF(N)?M(A|S)S$")>; 
+ 
+// Double-precision fused MAC stalls the pipeline behind it for 2 cycles, making 
+// it appear to have 3 cycle latency for scheduling. 
+ 
+def : InstRW<[M7WriteVFPLatOverride, WriteFPMAC64, 
+              ReadFPMAC, ReadFPMUL, ReadFPMUL], 
+             (instregex "VF(N)?M(A|S)D$")>; 
+ 
+}  // SchedModel = CortexM7Model 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
index 466acec6f7..aabce817a9 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleR52.td
@@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
 
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
-def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>; 
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>; 
 
 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
index d66b3065c7..ef2bde2a0d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMScheduleSwift.td
@@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
-                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
-                   "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT", 
+                   "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; 
 
   def : InstRW<[SwiftWriteP1TwoCycle],
         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
index 5cb608b74a..c49135d536 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,9 +97,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle,
                            bool MinSize)
-    : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
-      UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize),
-      IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
+    : ARMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), 
+      UseMulOps(UseFusedMulOps), CPUString(CPU), OptMinSize(MinSize), 
+      IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), 
       FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
@@ -185,7 +185,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     else
       ArchFS = std::string(FS);
   }
-  ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS);
+  ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, ArchFS); 
 
   // FIXME: This used enable V6T2 support implicitly for Thumb2 mode.
   // Assert this for now to make the change obvious.
@@ -237,7 +237,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   switch (IT) {
   case DefaultIT:
-    RestrictIT = hasV8Ops() && !hasMinSize();
+    RestrictIT = hasV8Ops() && !hasMinSize(); 
     break;
   case RestrictedIT:
     RestrictIT = true;
@@ -294,13 +294,13 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexA76:
   case CortexA77:
   case CortexA78:
-  case CortexA78C:
+  case CortexA78C: 
   case CortexR4:
   case CortexR4F:
   case CortexR5:
   case CortexR7:
   case CortexM3:
-  case CortexM7:
+  case CortexM7: 
   case CortexR52:
   case CortexX1:
     break;
@@ -316,8 +316,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     PreISelOperandLatencyAdjustment = 1;
     break;
   case NeoverseN1:
-  case NeoverseN2:
-  case NeoverseV1:
+  case NeoverseN2: 
+  case NeoverseV1: 
     break;
   case Swift:
     MaxInterleaveFactor = 2;
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
index fd9b94fdaa..a6335c6984 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMSubtarget.h
@@ -63,11 +63,11 @@ protected:
     CortexA76,
     CortexA77,
     CortexA78,
-    CortexA78C,
+    CortexA78C, 
     CortexA8,
     CortexA9,
     CortexM3,
-    CortexM7,
+    CortexM7, 
     CortexR4,
     CortexR4F,
     CortexR5,
@@ -78,8 +78,8 @@ protected:
     Krait,
     Kryo,
     NeoverseN1,
-    NeoverseN2,
-    NeoverseV1,
+    NeoverseN2, 
+    NeoverseV1, 
     Swift
   };
   enum ARMProcClassEnum {
@@ -167,7 +167,7 @@ protected:
   bool HasV8_4aOps = false;
   bool HasV8_5aOps = false;
   bool HasV8_6aOps = false;
-  bool HasV8_7aOps = false;
+  bool HasV8_7aOps = false; 
   bool HasV8MBaselineOps = false;
   bool HasV8MMainlineOps = false;
   bool HasV8_1MMainlineOps = false;
@@ -466,13 +466,13 @@ protected:
   /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
   bool NegativeImmediates = true;
 
-  /// Harden against Straight Line Speculation for Returns and Indirect
-  /// Branches.
-  bool HardenSlsRetBr = false;
-
-  /// Harden against Straight Line Speculation for indirect calls.
-  bool HardenSlsBlr = false;
-
+  /// Harden against Straight Line Speculation for Returns and Indirect 
+  /// Branches. 
+  bool HardenSlsRetBr = false; 
+ 
+  /// Harden against Straight Line Speculation for indirect calls. 
+  bool HardenSlsBlr = false; 
+ 
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   Align stackAlignment = Align(4);
@@ -538,7 +538,7 @@ public:
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 
 
   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
   /// so that we can use initializer lists for subtarget initialization.
@@ -606,7 +606,7 @@ public:
   bool hasV8_4aOps() const { return HasV8_4aOps; }
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8_6aOps() const { return HasV8_6aOps; }
-  bool hasV8_7aOps() const { return HasV8_7aOps; }
+  bool hasV8_7aOps() const { return HasV8_7aOps; } 
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
   bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
@@ -627,7 +627,7 @@ public:
   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
   bool isSwift()    const { return ARMProcFamily == Swift; }
   bool isCortexM3() const { return ARMProcFamily == CortexM3; }
-  bool isCortexM7() const { return ARMProcFamily == CortexM7; }
+  bool isCortexM7() const { return ARMProcFamily == CortexM7; } 
   bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
   bool isCortexR5() const { return ARMProcFamily == CortexR5; }
   bool isKrait() const { return ARMProcFamily == Krait; }
@@ -915,9 +915,9 @@ public:
   bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
                                    unsigned PhysReg) const override;
   unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
-
-  bool hardenSlsRetBr() const { return HardenSlsRetBr; }
-  bool hardenSlsBlr() const { return HardenSlsBlr; }
+ 
+  bool hardenSlsRetBr() const { return HardenSlsRetBr; } 
+  bool hardenSlsBlr() const { return HardenSlsBlr; } 
 };
 
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
index 237ef54c83..c4841aabdf 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.cpp
@@ -99,9 +99,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
   initializeMVEVPTOptimisationsPass(Registry);
   initializeMVETailPredicationPass(Registry);
   initializeARMLowOverheadLoopsPass(Registry);
-  initializeARMBlockPlacementPass(Registry);
+  initializeARMBlockPlacementPass(Registry); 
   initializeMVEGatherScatterLoweringPass(Registry);
-  initializeARMSLSHardeningPass(Registry);
+  initializeARMSLSHardeningPass(Registry); 
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -253,7 +253,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
 
   // ARM supports the MachineOutliner.
   setMachineOutliner(true);
-  setSupportsDefaultOutlining(true);
+  setSupportsDefaultOutlining(true); 
 }
 
 ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
@@ -263,10 +263,10 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
   Attribute FSAttr = F.getFnAttribute("target-features");
 
-  std::string CPU =
-      CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
-  std::string FS =
-      FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+  std::string CPU = 
+      CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; 
+  std::string FS = 
+      FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; 
 
   // FIXME: This is related to the code below to reset the target options,
   // we need to know whether or not the soft float flag is set on the
@@ -409,8 +409,8 @@ void ARMPassConfig::addIRPasses() {
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(
-        SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
-        [this](const Function &F) {
+        SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true), 
+        [this](const Function &F) { 
           const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
           return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
         }));
@@ -472,7 +472,7 @@ bool ARMPassConfig::addInstSelector() {
 }
 
 bool ARMPassConfig::addIRTranslator() {
-  addPass(new IRTranslator(getOptLevel()));
+  addPass(new IRTranslator(getOptLevel())); 
   return false;
 }
 
@@ -540,9 +540,9 @@ void ARMPassConfig::addPreSched2() {
     addPass(&PostMachineSchedulerID);
     addPass(&PostRASchedulerID);
   }
-
-  addPass(createARMIndirectThunks());
-  addPass(createARMSLSHardeningPass());
+ 
+  addPass(createARMIndirectThunks()); 
+  addPass(createARMSLSHardeningPass()); 
 }
 
 void ARMPassConfig::addPreEmitPass() {
@@ -553,11 +553,11 @@ void ARMPassConfig::addPreEmitPass() {
     return MF.getSubtarget<ARMSubtarget>().isThumb2();
   }));
 
-  // Don't optimize barriers or block placement at -O0.
-  if (getOptLevel() != CodeGenOpt::None) {
-    addPass(createARMBlockPlacementPass());
+  // Don't optimize barriers or block placement at -O0. 
+  if (getOptLevel() != CodeGenOpt::None) { 
+    addPass(createARMBlockPlacementPass()); 
     addPass(createARMOptimizeBarriersPass());
-  }
+  } 
 }
 
 void ARMPassConfig::addPreEmitPass2() {
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
index 8428092bf1..d9f5d40eb1 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetMachine.h
@@ -72,12 +72,12 @@ public:
   }
 
   bool targetSchedulesPostRAScheduling() const override { return true; };
-
-  /// Returns true if a cast between SrcAS and DestAS is a noop.
-  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
-    // Addrspacecasts are always noops.
-    return true;
-  }
+ 
+  /// Returns true if a cast between SrcAS and DestAS is a noop. 
+  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { 
+    // Addrspacecasts are always noops. 
+    return true; 
+  } 
 };
 
 /// ARM/Thumb little endian target machine.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8901934013..e4e4252041 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -20,18 +20,18 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h" 
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/KnownBits.h" 
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h" 
+#include "llvm/Transforms/Utils/Local.h" 
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <cassert>
@@ -50,38 +50,38 @@ static cl::opt<bool> DisableLowOverheadLoops(
   "disable-arm-loloops", cl::Hidden, cl::init(false),
   cl::desc("Disable the generation of low-overhead loops"));
 
-static cl::opt<bool>
-    AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
-                  cl::desc("Enable the generation of WLS loops"));
-
+static cl::opt<bool> 
+    AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), 
+                  cl::desc("Enable the generation of WLS loops")); 
+ 
 extern cl::opt<TailPredication::Mode> EnableTailPredication;
 
 extern cl::opt<bool> EnableMaskedGatherScatters;
 
-extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;
-
-/// Convert a vector load intrinsic into a simple llvm load instruction.
-/// This is beneficial when the underlying object being addressed comes
-/// from a constant, since we get constant-folding for free.
-static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
-                               InstCombiner::BuilderTy &Builder) {
-  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
-
-  if (!IntrAlign)
-    return nullptr;
-
-  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
-                           ? MemAlign
-                           : IntrAlign->getLimitedValue();
-
-  if (!isPowerOf2_32(Alignment))
-    return nullptr;
-
-  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
-                                          PointerType::get(II.getType(), 0));
-  return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
-}
-
+extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor; 
+ 
+/// Convert a vector load intrinsic into a simple llvm load instruction. 
+/// This is beneficial when the underlying object being addressed comes 
+/// from a constant, since we get constant-folding for free. 
+static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, 
+                               InstCombiner::BuilderTy &Builder) { 
+  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1)); 
+ 
+  if (!IntrAlign) 
+    return nullptr; 
+ 
+  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign 
+                           ? MemAlign 
+                           : IntrAlign->getLimitedValue(); 
+ 
+  if (!isPowerOf2_32(Alignment)) 
+    return nullptr; 
+ 
+  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0), 
+                                          PointerType::get(II.getType(), 0)); 
+  return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment)); 
+} 
+ 
 bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
                                      const Function *Callee) const {
   const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -114,138 +114,138 @@ bool ARMTTIImpl::shouldFavorPostInc() const {
   return false;
 }
 
-Optional<Instruction *>
-ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
-  using namespace PatternMatch;
-  Intrinsic::ID IID = II.getIntrinsicID();
-  switch (IID) {
-  default:
-    break;
-  case Intrinsic::arm_neon_vld1: {
-    Align MemAlign =
-        getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
-                          &IC.getAssumptionCache(), &IC.getDominatorTree());
-    if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
-      return IC.replaceInstUsesWith(II, V);
-    }
-    break;
-  }
-
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
-  case Intrinsic::arm_neon_vst1:
-  case Intrinsic::arm_neon_vst2:
-  case Intrinsic::arm_neon_vst3:
-  case Intrinsic::arm_neon_vst4:
-  case Intrinsic::arm_neon_vst2lane:
-  case Intrinsic::arm_neon_vst3lane:
-  case Intrinsic::arm_neon_vst4lane: {
-    Align MemAlign =
-        getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
-                          &IC.getAssumptionCache(), &IC.getDominatorTree());
-    unsigned AlignArg = II.getNumArgOperands() - 1;
-    Value *AlignArgOp = II.getArgOperand(AlignArg);
-    MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
-    if (Align && *Align < MemAlign) {
-      return IC.replaceOperand(
-          II, AlignArg,
-          ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
-                           false));
-    }
-    break;
-  }
-
-  case Intrinsic::arm_mve_pred_i2v: {
-    Value *Arg = II.getArgOperand(0);
-    Value *ArgArg;
-    if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
-                       PatternMatch::m_Value(ArgArg))) &&
-        II.getType() == ArgArg->getType()) {
-      return IC.replaceInstUsesWith(II, ArgArg);
-    }
-    Constant *XorMask;
-    if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
-                             PatternMatch::m_Value(ArgArg)),
-                         PatternMatch::m_Constant(XorMask))) &&
-        II.getType() == ArgArg->getType()) {
-      if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
-        if (CI->getValue().trunc(16).isAllOnesValue()) {
-          auto TrueVector = IC.Builder.CreateVectorSplat(
-              cast<FixedVectorType>(II.getType())->getNumElements(),
-              IC.Builder.getTrue());
-          return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
-        }
-      }
-    }
-    KnownBits ScalarKnown(32);
-    if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
-                                ScalarKnown, 0)) {
-      return &II;
-    }
-    break;
-  }
-  case Intrinsic::arm_mve_pred_v2i: {
-    Value *Arg = II.getArgOperand(0);
-    Value *ArgArg;
-    if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
-                       PatternMatch::m_Value(ArgArg)))) {
-      return IC.replaceInstUsesWith(II, ArgArg);
-    }
-    if (!II.getMetadata(LLVMContext::MD_range)) {
-      Type *IntTy32 = Type::getInt32Ty(II.getContext());
-      Metadata *M[] = {
-          ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
-          ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))};
-      II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
-      return &II;
-    }
-    break;
-  }
-  case Intrinsic::arm_mve_vadc:
-  case Intrinsic::arm_mve_vadc_predicated: {
-    unsigned CarryOp =
-        (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
-    assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
-           "Bad type for intrinsic!");
-
-    KnownBits CarryKnown(32);
-    if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
-                                CarryKnown)) {
-      return &II;
-    }
-    break;
-  }
-  case Intrinsic::arm_mve_vmldava: {
-    Instruction *I = cast<Instruction>(&II);
-    if (I->hasOneUse()) {
-      auto *User = cast<Instruction>(*I->user_begin());
-      Value *OpZ;
-      if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
-          match(I->getOperand(3), m_Zero())) {
-        Value *OpX = I->getOperand(4);
-        Value *OpY = I->getOperand(5);
-        Type *OpTy = OpX->getType();
-
-        IC.Builder.SetInsertPoint(User);
-        Value *V =
-            IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
-                                       {I->getOperand(0), I->getOperand(1),
-                                        I->getOperand(2), OpZ, OpX, OpY});
-
-        IC.replaceInstUsesWith(*User, V);
-        return IC.eraseInstFromFunction(*User);
-      }
-    }
-    return None;
-  }
-  }
-  return None;
-}
-
+Optional<Instruction *> 
+ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 
+  using namespace PatternMatch; 
+  Intrinsic::ID IID = II.getIntrinsicID(); 
+  switch (IID) { 
+  default: 
+    break; 
+  case Intrinsic::arm_neon_vld1: { 
+    Align MemAlign = 
+        getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II, 
+                          &IC.getAssumptionCache(), &IC.getDominatorTree()); 
+    if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) { 
+      return IC.replaceInstUsesWith(II, V); 
+    } 
+    break; 
+  } 
+ 
+  case Intrinsic::arm_neon_vld2: 
+  case Intrinsic::arm_neon_vld3: 
+  case Intrinsic::arm_neon_vld4: 
+  case Intrinsic::arm_neon_vld2lane: 
+  case Intrinsic::arm_neon_vld3lane: 
+  case Intrinsic::arm_neon_vld4lane: 
+  case Intrinsic::arm_neon_vst1: 
+  case Intrinsic::arm_neon_vst2: 
+  case Intrinsic::arm_neon_vst3: 
+  case Intrinsic::arm_neon_vst4: 
+  case Intrinsic::arm_neon_vst2lane: 
+  case Intrinsic::arm_neon_vst3lane: 
+  case Intrinsic::arm_neon_vst4lane: { 
+    Align MemAlign = 
+        getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II, 
+                          &IC.getAssumptionCache(), &IC.getDominatorTree()); 
+    unsigned AlignArg = II.getNumArgOperands() - 1; 
+    Value *AlignArgOp = II.getArgOperand(AlignArg); 
+    MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue(); 
+    if (Align && *Align < MemAlign) { 
+      return IC.replaceOperand( 
+          II, AlignArg, 
+          ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(), 
+                           false)); 
+    } 
+    break; 
+  } 
+ 
+  case Intrinsic::arm_mve_pred_i2v: { 
+    Value *Arg = II.getArgOperand(0); 
+    Value *ArgArg; 
+    if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>( 
+                       PatternMatch::m_Value(ArgArg))) && 
+        II.getType() == ArgArg->getType()) { 
+      return IC.replaceInstUsesWith(II, ArgArg); 
+    } 
+    Constant *XorMask; 
+    if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>( 
+                             PatternMatch::m_Value(ArgArg)), 
+                         PatternMatch::m_Constant(XorMask))) && 
+        II.getType() == ArgArg->getType()) { 
+      if (auto *CI = dyn_cast<ConstantInt>(XorMask)) { 
+        if (CI->getValue().trunc(16).isAllOnesValue()) { 
+          auto TrueVector = IC.Builder.CreateVectorSplat( 
+              cast<FixedVectorType>(II.getType())->getNumElements(), 
+              IC.Builder.getTrue()); 
+          return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector); 
+        } 
+      } 
+    } 
+    KnownBits ScalarKnown(32); 
+    if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16), 
+                                ScalarKnown, 0)) { 
+      return &II; 
+    } 
+    break; 
+  } 
+  case Intrinsic::arm_mve_pred_v2i: { 
+    Value *Arg = II.getArgOperand(0); 
+    Value *ArgArg; 
+    if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>( 
+                       PatternMatch::m_Value(ArgArg)))) { 
+      return IC.replaceInstUsesWith(II, ArgArg); 
+    } 
+    if (!II.getMetadata(LLVMContext::MD_range)) { 
+      Type *IntTy32 = Type::getInt32Ty(II.getContext()); 
+      Metadata *M[] = { 
+          ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)), 
+          ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0xFFFF))}; 
+      II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M)); 
+      return &II; 
+    } 
+    break; 
+  } 
+  case Intrinsic::arm_mve_vadc: 
+  case Intrinsic::arm_mve_vadc_predicated: { 
+    unsigned CarryOp = 
+        (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2; 
+    assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 && 
+           "Bad type for intrinsic!"); 
+ 
+    KnownBits CarryKnown(32); 
+    if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29), 
+                                CarryKnown)) { 
+      return &II; 
+    } 
+    break; 
+  } 
+  case Intrinsic::arm_mve_vmldava: { 
+    Instruction *I = cast<Instruction>(&II); 
+    if (I->hasOneUse()) { 
+      auto *User = cast<Instruction>(*I->user_begin()); 
+      Value *OpZ; 
+      if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) && 
+          match(I->getOperand(3), m_Zero())) { 
+        Value *OpX = I->getOperand(4); 
+        Value *OpY = I->getOperand(5); 
+        Type *OpTy = OpX->getType(); 
+ 
+        IC.Builder.SetInsertPoint(User); 
+        Value *V = 
+            IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy}, 
+                                       {I->getOperand(0), I->getOperand(1), 
+                                        I->getOperand(2), OpZ, OpX, OpY}); 
+ 
+        IC.replaceInstUsesWith(*User, V); 
+        return IC.eraseInstFromFunction(*User); 
+      } 
+    } 
+    return None; 
+  } 
+  } 
+  return None; 
+} 
+ 
 int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
                               TTI::TargetCostKind CostKind) {
   assert(Ty->isIntegerTy());
@@ -289,43 +289,43 @@ int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
   return 1;
 }
 
-// Checks whether Inst is part of a min(max()) or max(min()) pattern
-// that will match to an SSAT instruction
-static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
-  Value *LHS, *RHS;
-  ConstantInt *C;
-  SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
-
-  if (InstSPF == SPF_SMAX &&
-      PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
-      C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
-
-    auto isSSatMin = [&](Value *MinInst) {
-      if (isa<SelectInst>(MinInst)) {
-        Value *MinLHS, *MinRHS;
-        ConstantInt *MinC;
-        SelectPatternFlavor MinSPF =
-            matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
-        if (MinSPF == SPF_SMIN &&
-            PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
-            MinC->getValue() == ((-Imm) - 1))
-          return true;
-      }
-      return false;
-    };
-
-    if (isSSatMin(Inst->getOperand(1)) ||
-        (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
-                               isSSatMin(*(++Inst->user_begin())))))
-      return true;
-  }
-  return false;
-}
-
-int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
-                                  const APInt &Imm, Type *Ty,
-                                  TTI::TargetCostKind CostKind,
-                                  Instruction *Inst) {
+// Checks whether Inst is part of a min(max()) or max(min()) pattern 
+// that will match to an SSAT instruction 
+static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) { 
+  Value *LHS, *RHS; 
+  ConstantInt *C; 
+  SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor; 
+ 
+  if (InstSPF == SPF_SMAX && 
+      PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) && 
+      C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) { 
+ 
+    auto isSSatMin = [&](Value *MinInst) { 
+      if (isa<SelectInst>(MinInst)) { 
+        Value *MinLHS, *MinRHS; 
+        ConstantInt *MinC; 
+        SelectPatternFlavor MinSPF = 
+            matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor; 
+        if (MinSPF == SPF_SMIN && 
+            PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) && 
+            MinC->getValue() == ((-Imm) - 1)) 
+          return true; 
+      } 
+      return false; 
+    }; 
+ 
+    if (isSSatMin(Inst->getOperand(1)) || 
+        (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) || 
+                               isSSatMin(*(++Inst->user_begin()))))) 
+      return true; 
+  } 
+  return false; 
+} 
+ 
+int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, 
+                                  const APInt &Imm, Type *Ty, 
+                                  TTI::TargetCostKind CostKind, 
+                                  Instruction *Inst) { 
   // Division by a constant can be turned into multiplication, but only if we
   // know it's constant. So it's not so much that the immediate is cheap (it's
   // not), but that the alternative is worse.
@@ -364,33 +364,33 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
   if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
     return 0;
 
-  // Ensures negative constant of min(max()) or max(min()) patterns that
-  // match to SSAT instructions don't get hoisted
-  if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
-      Ty->getIntegerBitWidth() <= 32) {
-    if (isSSATMinMaxPattern(Inst, Imm) ||
-        (isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
-         isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
-      return 0;
-  }
-
+  // Ensures negative constant of min(max()) or max(min()) patterns that 
+  // match to SSAT instructions don't get hoisted 
+  if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) && 
+      Ty->getIntegerBitWidth() <= 32) { 
+    if (isSSATMinMaxPattern(Inst, Imm) || 
+        (isa<ICmpInst>(Inst) && Inst->hasOneUse() && 
+         isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm))) 
+      return 0; 
+  } 
+ 
   return getIntImmCost(Imm, Ty, CostKind);
 }
 
-int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
-  if (CostKind == TTI::TCK_RecipThroughput &&
-      (ST->hasNEON() || ST->hasMVEIntegerOps())) {
-    // FIXME: The vectorizer is highly sensistive to the cost of these
-    // instructions, which suggests that it may be using the costs incorrectly.
-    // But, for now, just make them free to avoid performance regressions for
-    // vector targets.
-    return 0;
-  }
-  return BaseT::getCFInstrCost(Opcode, CostKind);
-}
-
+int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { 
+  if (CostKind == TTI::TCK_RecipThroughput && 
+      (ST->hasNEON() || ST->hasMVEIntegerOps())) { 
+    // FIXME: The vectorizer is highly sensistive to the cost of these 
+    // instructions, which suggests that it may be using the costs incorrectly. 
+    // But, for now, just make them free to avoid performance regressions for 
+    // vector targets. 
+    return 0; 
+  } 
+  return BaseT::getCFInstrCost(Opcode, CostKind); 
+} 
+ 
 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                                 TTI::CastContextHint CCH,
+                                 TTI::CastContextHint CCH, 
                                  TTI::TargetCostKind CostKind,
                                  const Instruction *I) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -402,35 +402,35 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
       return Cost == 0 ? 0 : 1;
     return Cost;
   };
-  auto IsLegalFPType = [this](EVT VT) {
-    EVT EltVT = VT.getScalarType();
-    return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
-            (EltVT == MVT::f64 && ST->hasFP64()) ||
-            (EltVT == MVT::f16 && ST->hasFullFP16());
-  };
+  auto IsLegalFPType = [this](EVT VT) { 
+    EVT EltVT = VT.getScalarType(); 
+    return (EltVT == MVT::f32 && ST->hasVFP2Base()) || 
+            (EltVT == MVT::f64 && ST->hasFP64()) || 
+            (EltVT == MVT::f16 && ST->hasFullFP16()); 
+  }; 
 
   EVT SrcTy = TLI->getValueType(DL, Src);
   EVT DstTy = TLI->getValueType(DL, Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return AdjustCost(
-        BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
-
-  // Extending masked load/Truncating masked stores is expensive because we
-  // currently don't split them. This means that we'll likely end up
-  // loading/storing each element individually (hence the high cost).
-  if ((ST->hasMVEIntegerOps() &&
-       (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
-        Opcode == Instruction::SExt)) ||
-      (ST->hasMVEFloatOps() &&
-       (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
-       IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
-    if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
-      return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor();
-
-  // The extend of other kinds of load is free
-  if (CCH == TTI::CastContextHint::Normal ||
-      CCH == TTI::CastContextHint::Masked) {
+    return AdjustCost( 
+        BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); 
+
+  // Extending masked load/Truncating masked stores is expensive because we 
+  // currently don't split them. This means that we'll likely end up 
+  // loading/storing each element individually (hence the high cost). 
+  if ((ST->hasMVEIntegerOps() && 
+       (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt || 
+        Opcode == Instruction::SExt)) || 
+      (ST->hasMVEFloatOps() && 
+       (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) && 
+       IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))) 
+    if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128) 
+      return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor(); 
+ 
+  // The extend of other kinds of load is free 
+  if (CCH == TTI::CastContextHint::Normal || 
+      CCH == TTI::CastContextHint::Masked) { 
     static const TypeConversionCostTblEntry LoadConversionTbl[] = {
         {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
         {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
@@ -485,31 +485,31 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
         return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
     }
 
-    // The truncate of a store is free. This is the mirror of extends above.
-    static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
+    // The truncate of a store is free. This is the mirror of extends above. 
+    static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = { 
         {ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
         {ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
         {ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
         {ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
-        {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
+        {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1}, 
         {ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
         {ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
     };
     if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
       if (const auto *Entry =
-              ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
-                                     SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
+              ConvertCostTableLookup(MVEStoreConversionTbl, ISD, 
+                                     SrcTy.getSimpleVT(), DstTy.getSimpleVT())) 
         return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
     }
 
-    static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
+    static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = { 
         {ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
         {ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
     };
     if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
       if (const auto *Entry =
-              ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
-                                     SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
+              ConvertCostTableLookup(MVEFStoreConversionTbl, ISD, 
+                                     SrcTy.getSimpleVT(), DstTy.getSimpleVT())) 
         return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
     }
   }
@@ -746,24 +746,24 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     if (SrcTy.isFixedLengthVector())
       Lanes = SrcTy.getVectorNumElements();
 
-    if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
+    if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)) 
       return Lanes;
     else
       return Lanes * CallCost;
   }
 
-  if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
-      SrcTy.isFixedLengthVector()) {
-    // Treat a truncate with larger than legal source (128bits for MVE) as
-    // expensive, 2 instructions per lane.
-    if ((SrcTy.getScalarType() == MVT::i8 ||
-         SrcTy.getScalarType() == MVT::i16 ||
-         SrcTy.getScalarType() == MVT::i32) &&
-        SrcTy.getSizeInBits() > 128 &&
-        SrcTy.getSizeInBits() > DstTy.getSizeInBits())
-      return SrcTy.getVectorNumElements() * 2;
-  }
-
+  if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() && 
+      SrcTy.isFixedLengthVector()) { 
+    // Treat a truncate with larger than legal source (128bits for MVE) as 
+    // expensive, 2 instructions per lane. 
+    if ((SrcTy.getScalarType() == MVT::i8 || 
+         SrcTy.getScalarType() == MVT::i16 || 
+         SrcTy.getScalarType() == MVT::i32) && 
+        SrcTy.getSizeInBits() > 128 && 
+        SrcTy.getSizeInBits() > DstTy.getSizeInBits()) 
+      return SrcTy.getVectorNumElements() * 2; 
+  } 
+ 
   // Scalar integer conversion costs.
   static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
     // i16 -> i64 requires two dependent operations.
@@ -787,7 +787,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                      ? ST->getMVEVectorCostFactor()
                      : 1;
   return AdjustCost(
-      BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+      BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); 
 }
 
 int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
@@ -827,37 +827,37 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
 }
 
 int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                                   CmpInst::Predicate VecPred,
+                                   CmpInst::Predicate VecPred, 
                                    TTI::TargetCostKind CostKind,
                                    const Instruction *I) {
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-
-  // Thumb scalar code size cost for select.
-  if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
-      ST->isThumb() && !ValTy->isVectorTy()) {
-    // Assume expensive structs.
-    if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
-      return TTI::TCC_Expensive;
-
-    // Select costs can vary because they:
-    // - may require one or more conditional mov (including an IT),
-    // - can't operate directly on immediates,
-    // - require live flags, which we can't copy around easily.
-    int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;
-
-    // Possible IT instruction for Thumb2, or more for Thumb1.
-    ++Cost;
-
-    // i1 values may need rematerialising by using mov immediates and/or
-    // flag setting instructions.
-    if (ValTy->isIntegerTy(1))
-      ++Cost;
-
-    return Cost;
-  }
-
+  int ISD = TLI->InstructionOpcodeToISD(Opcode); 
+
+  // Thumb scalar code size cost for select. 
+  if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT && 
+      ST->isThumb() && !ValTy->isVectorTy()) { 
+    // Assume expensive structs. 
+    if (TLI->getValueType(DL, ValTy, true) == MVT::Other) 
+      return TTI::TCC_Expensive; 
+ 
+    // Select costs can vary because they: 
+    // - may require one or more conditional mov (including an IT), 
+    // - can't operate directly on immediates, 
+    // - require live flags, which we can't copy around easily. 
+    int Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; 
+ 
+    // Possible IT instruction for Thumb2, or more for Thumb1. 
+    ++Cost; 
+ 
+    // i1 values may need rematerialising by using mov immediates and/or 
+    // flag setting instructions. 
+    if (ValTy->isIntegerTy(1)) 
+      ++Cost; 
+ 
+    return Cost; 
+  } 
+ 
   // On NEON a vector select gets lowered to vbsl.
-  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
+  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) { 
     // Lowering of some vector selects is currently far from perfect.
     static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
@@ -878,15 +878,15 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
     return LT.first;
   }
 
-  // Default to cheap (throughput/size of 1 instruction) but adjust throughput
-  // for "multiple beats" potentially needed by MVE instructions.
-  int BaseCost = 1;
-  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
-      ValTy->isVectorTy())
-    BaseCost = ST->getMVEVectorCostFactor();
-
-  return BaseCost *
-         BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
+  // Default to cheap (throughput/size of 1 instruction) but adjust throughput 
+  // for "multiple beats" potentially needed by MVE instructions. 
+  int BaseCost = 1; 
+  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() && 
+      ValTy->isVectorTy()) 
+    BaseCost = ST->getMVEVectorCostFactor(); 
+ 
+  return BaseCost * 
+         BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); 
 }
 
 int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -968,85 +968,85 @@ bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
           (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
 }
 
-/// Given a memcpy/memset/memmove instruction, return the number of memory
-/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
-/// call is used.
-int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
-  MemOp MOp;
-  unsigned DstAddrSpace = ~0u;
-  unsigned SrcAddrSpace = ~0u;
-  const Function *F = I->getParent()->getParent();
-
-  if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
-    ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
-    // If 'size' is not a constant, a library call will be generated.
-    if (!C)
-      return -1;
-
-    const unsigned Size = C->getValue().getZExtValue();
-    const Align DstAlign = *MC->getDestAlign();
-    const Align SrcAlign = *MC->getSourceAlign();
-
-    MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign,
-                      /*IsVolatile*/ false);
-    DstAddrSpace = MC->getDestAddressSpace();
-    SrcAddrSpace = MC->getSourceAddressSpace();
-  }
-  else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
-    ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
-    // If 'size' is not a constant, a library call will be generated.
-    if (!C)
-      return -1;
-
-    const unsigned Size = C->getValue().getZExtValue();
-    const Align DstAlign = *MS->getDestAlign();
-
-    MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign,
-                     /*IsZeroMemset*/ false, /*IsVolatile*/ false);
-    DstAddrSpace = MS->getDestAddressSpace();
-  }
-  else
-    llvm_unreachable("Expected a memcpy/move or memset!");
-
-  unsigned Limit, Factor = 2;
-  switch(I->getIntrinsicID()) {
-    case Intrinsic::memcpy:
-      Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
-      break;
-    case Intrinsic::memmove:
-      Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
-      break;
-    case Intrinsic::memset:
-      Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
-      Factor = 1;
-      break;
-    default:
-      llvm_unreachable("Expected a memcpy/move or memset!");
-  }
-
+/// Given a memcpy/memset/memmove instruction, return the number of memory 
+/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a 
+/// call is used. 
+int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const { 
+  MemOp MOp; 
+  unsigned DstAddrSpace = ~0u; 
+  unsigned SrcAddrSpace = ~0u; 
+  const Function *F = I->getParent()->getParent(); 
+
+  if (const auto *MC = dyn_cast<MemTransferInst>(I)) { 
+    ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength()); 
+    // If 'size' is not a constant, a library call will be generated. 
+    if (!C) 
+      return -1; 
+
+    const unsigned Size = C->getValue().getZExtValue(); 
+    const Align DstAlign = *MC->getDestAlign(); 
+    const Align SrcAlign = *MC->getSourceAlign(); 
+
+    MOp = MemOp::Copy(Size, /*DstAlignCanChange*/ false, DstAlign, SrcAlign, 
+                      /*IsVolatile*/ false); 
+    DstAddrSpace = MC->getDestAddressSpace(); 
+    SrcAddrSpace = MC->getSourceAddressSpace(); 
+  } 
+  else if (const auto *MS = dyn_cast<MemSetInst>(I)) { 
+    ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength()); 
+    // If 'size' is not a constant, a library call will be generated. 
+    if (!C) 
+      return -1; 
+
+    const unsigned Size = C->getValue().getZExtValue(); 
+    const Align DstAlign = *MS->getDestAlign(); 
+ 
+    MOp = MemOp::Set(Size, /*DstAlignCanChange*/ false, DstAlign, 
+                     /*IsZeroMemset*/ false, /*IsVolatile*/ false); 
+    DstAddrSpace = MS->getDestAddressSpace(); 
+  } 
+  else 
+    llvm_unreachable("Expected a memcpy/move or memset!"); 
+ 
+  unsigned Limit, Factor = 2; 
+  switch(I->getIntrinsicID()) { 
+    case Intrinsic::memcpy: 
+      Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize()); 
+      break; 
+    case Intrinsic::memmove: 
+      Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize()); 
+      break; 
+    case Intrinsic::memset: 
+      Limit = TLI->getMaxStoresPerMemset(F->hasMinSize()); 
+      Factor = 1; 
+      break; 
+    default: 
+      llvm_unreachable("Expected a memcpy/move or memset!"); 
+  } 
+ 
   // MemOps will be poplulated with a list of data types that needs to be
   // loaded and stored. That's why we multiply the number of elements by 2 to
   // get the cost for this memcpy.
-  std::vector<EVT> MemOps;
+  std::vector<EVT> MemOps; 
   if (getTLI()->findOptimalMemOpLowering(
-          MemOps, Limit, MOp, DstAddrSpace,
-          SrcAddrSpace, F->getAttributes()))
-    return MemOps.size() * Factor;
+          MemOps, Limit, MOp, DstAddrSpace, 
+          SrcAddrSpace, F->getAttributes())) 
+    return MemOps.size() * Factor; 
 
   // If we can't find an optimal memop lowering, return the default cost
-  return -1;
-}
-
-int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
-  int NumOps = getNumMemOps(cast<IntrinsicInst>(I));
-
-  // To model the cost of a library call, we assume 1 for the call, and
-  // 3 for the argument setup.
-  if (NumOps == -1)
-    return 4;
-  return NumOps;
+  return -1; 
 }
 
+int ARMTTIImpl::getMemcpyCost(const Instruction *I) { 
+  int NumOps = getNumMemOps(cast<IntrinsicInst>(I)); 
+ 
+  // To model the cost of a library call, we assume 1 for the call, and 
+  // 3 for the argument setup. 
+  if (NumOps == -1) 
+    return 4; 
+  return NumOps; 
+} 
+ 
 int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
                                int Index, VectorType *SubTp) {
   if (ST->hasNEON()) {
@@ -1149,21 +1149,21 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                        TTI::OperandValueProperties Opd2PropInfo,
                                        ArrayRef<const Value *> Args,
                                        const Instruction *CxtI) {
-  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
-  if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
-    // Make operations on i1 relatively expensive as this often involves
-    // combining predicates. AND and XOR should be easier to handle with IT
-    // blocks.
-    switch (ISDOpcode) {
-    default:
-      break;
-    case ISD::AND:
-    case ISD::XOR:
-      return 2;
-    case ISD::OR:
-      return 3;
-    }
-  }
+  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); 
+  if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) { 
+    // Make operations on i1 relatively expensive as this often involves 
+    // combining predicates. AND and XOR should be easier to handle with IT 
+    // blocks. 
+    switch (ISDOpcode) { 
+    default: 
+      break; 
+    case ISD::AND: 
+    case ISD::XOR: 
+      return 2; 
+    case ISD::OR: 
+      return 3; 
+    } 
+  } 
 
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
@@ -1259,12 +1259,12 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   if (LooksLikeAFreeShift())
     return 0;
 
-  // Default to cheap (throughput/size of 1 instruction) but adjust throughput
-  // for "multiple beats" potentially needed by MVE instructions.
-  int BaseCost = 1;
-  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() &&
-      Ty->isVectorTy())
-    BaseCost = ST->getMVEVectorCostFactor();
+  // Default to cheap (throughput/size of 1 instruction) but adjust throughput 
+  // for "multiple beats" potentially needed by MVE instructions. 
+  int BaseCost = 1; 
+  if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps() && 
+      Ty->isVectorTy()) 
+    BaseCost = ST->getMVEVectorCostFactor(); 
 
   // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
   // without treating floats as more expensive that scalars or increasing the
@@ -1331,24 +1331,24 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                            CostKind, I);
 }
 
-unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
-                                           Align Alignment,
-                                           unsigned AddressSpace,
-                                           TTI::TargetCostKind CostKind) {
-  if (ST->hasMVEIntegerOps()) {
-    if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
-      return ST->getMVEVectorCostFactor();
-    if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
-      return ST->getMVEVectorCostFactor();
-  }
-  if (!isa<FixedVectorType>(Src))
-    return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
-                                        CostKind);
-  // Scalar cost, which is currently very high due to the efficiency of the
-  // generated code.
-  return cast<FixedVectorType>(Src)->getNumElements() * 8;
-}
-
+unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 
+                                           Align Alignment, 
+                                           unsigned AddressSpace, 
+                                           TTI::TargetCostKind CostKind) { 
+  if (ST->hasMVEIntegerOps()) { 
+    if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment)) 
+      return ST->getMVEVectorCostFactor(); 
+    if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment)) 
+      return ST->getMVEVectorCostFactor(); 
+  } 
+  if (!isa<FixedVectorType>(Src)) 
+    return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 
+                                        CostKind); 
+  // Scalar cost, which is currently very high due to the efficiency of the 
+  // generated code. 
+  return cast<FixedVectorType>(Src)->getNumElements() * 8; 
+} 
+ 
 int ARMTTIImpl::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1379,8 +1379,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
     // promoted differently). The cost of 2 here is then a load and vrev or
     // vmovn.
     if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
-        VecTy->isIntOrIntVectorTy() &&
-        DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
+        VecTy->isIntOrIntVectorTy() && 
+        DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64) 
       return 2 * BaseCost;
   }
 
@@ -1413,13 +1413,13 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
   // multiplied by the number of elements being loaded. This is possibly very
   // conservative, but even so we still end up vectorising loops because the
   // cost per iteration for many loops is lower than for scalar loops.
-  unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor();
+  unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor(); 
   // The scalarization cost should be a lot higher. We use the number of vector
   // elements plus the scalarization overhead.
   unsigned ScalarCost =
       NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, {});
 
-  if (EltSize < 8 || Alignment < EltSize / 8)
+  if (EltSize < 8 || Alignment < EltSize / 8) 
     return ScalarCost;
 
   unsigned ExtSize = EltSize;
@@ -1488,92 +1488,92 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
   return ScalarCost;
 }
 
-int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                           bool IsPairwiseForm,
-                                           TTI::TargetCostKind CostKind) {
-  EVT ValVT = TLI->getValueType(DL, ValTy);
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD)
-    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
-                                             CostKind);
-
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-
-  static const CostTblEntry CostTblAdd[]{
-      {ISD::ADD, MVT::v16i8, 1},
-      {ISD::ADD, MVT::v8i16, 1},
-      {ISD::ADD, MVT::v4i32, 1},
-  };
-  if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
-    return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first;
-
-  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
-                                           CostKind);
-}
-
-InstructionCost
-ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
-                                        Type *ResTy, VectorType *ValTy,
-                                        TTI::TargetCostKind CostKind) {
-  EVT ValVT = TLI->getValueType(DL, ValTy);
-  EVT ResVT = TLI->getValueType(DL, ResTy);
-  if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
-    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-    if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
-        (LT.second == MVT::v8i16 &&
-         ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
-        (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
-      return ST->getMVEVectorCostFactor() * LT.first;
-  }
-
-  return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
-                                            CostKind);
-}
-
-int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                                      TTI::TargetCostKind CostKind) {
-  switch (ICA.getID()) {
-  case Intrinsic::get_active_lane_mask:
-    // Currently we make a somewhat optimistic assumption that
-    // active_lane_mask's are always free. In reality it may be freely folded
-    // into a tail predicated loop, expanded into a VCPT or expanded into a lot
-    // of add/icmp code. We may need to improve this in the future, but being
-    // able to detect if it is free or not involves looking at a lot of other
-    // code. We currently assume that the vectorizer inserted these, and knew
-    // what it was doing in adding one.
-    if (ST->hasMVEIntegerOps())
-      return 0;
-    break;
-  case Intrinsic::sadd_sat:
-  case Intrinsic::ssub_sat:
-  case Intrinsic::uadd_sat:
-  case Intrinsic::usub_sat: {
-    if (!ST->hasMVEIntegerOps())
-      break;
-    // Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
-    Type *VT = ICA.getReturnType();
-    if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
-      VT = VectorType::get(VT, ICA.getVectorFactor());
-
-    std::pair<int, MVT> LT =
-        TLI->getTypeLegalizationCost(DL, VT);
-    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
-        LT.second == MVT::v16i8) {
-      // This is a base cost of 1 for the vadd, plus 3 extract shifts if we
-      // need to extend the type, as it uses shr(qadd(shl, shl)).
-      unsigned Instrs = LT.second.getScalarSizeInBits() ==
-                                ICA.getReturnType()->getScalarSizeInBits()
-                            ? 1
-                            : 4;
-      return LT.first * ST->getMVEVectorCostFactor() * Instrs;
-    }
-    break;
-  }
-  }
-
-  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-}
-
+int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, 
+                                           bool IsPairwiseForm, 
+                                           TTI::TargetCostKind CostKind) { 
+  EVT ValVT = TLI->getValueType(DL, ValTy); 
+  int ISD = TLI->InstructionOpcodeToISD(Opcode); 
+  if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD) 
+    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, 
+                                             CostKind); 
+ 
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); 
+ 
+  static const CostTblEntry CostTblAdd[]{ 
+      {ISD::ADD, MVT::v16i8, 1}, 
+      {ISD::ADD, MVT::v8i16, 1}, 
+      {ISD::ADD, MVT::v4i32, 1}, 
+  }; 
+  if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second)) 
+    return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first; 
+ 
+  return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, 
+                                           CostKind); 
+} 
+ 
+InstructionCost 
+ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, 
+                                        Type *ResTy, VectorType *ValTy, 
+                                        TTI::TargetCostKind CostKind) { 
+  EVT ValVT = TLI->getValueType(DL, ValTy); 
+  EVT ResVT = TLI->getValueType(DL, ResTy); 
+  if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) { 
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); 
+    if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) || 
+        (LT.second == MVT::v8i16 && 
+         ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) || 
+        (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64)) 
+      return ST->getMVEVectorCostFactor() * LT.first; 
+  } 
+ 
+  return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy, 
+                                            CostKind); 
+} 
+ 
+int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 
+                                      TTI::TargetCostKind CostKind) { 
+  switch (ICA.getID()) { 
+  case Intrinsic::get_active_lane_mask: 
+    // Currently we make a somewhat optimistic assumption that 
+    // active_lane_mask's are always free. In reality it may be freely folded 
+    // into a tail predicated loop, expanded into a VCPT or expanded into a lot 
+    // of add/icmp code. We may need to improve this in the future, but being 
+    // able to detect if it is free or not involves looking at a lot of other 
+    // code. We currently assume that the vectorizer inserted these, and knew 
+    // what it was doing in adding one. 
+    if (ST->hasMVEIntegerOps()) 
+      return 0; 
+    break; 
+  case Intrinsic::sadd_sat: 
+  case Intrinsic::ssub_sat: 
+  case Intrinsic::uadd_sat: 
+  case Intrinsic::usub_sat: { 
+    if (!ST->hasMVEIntegerOps()) 
+      break; 
+    // Get the Return type, either directly of from ICA.ReturnType and ICA.VF. 
+    Type *VT = ICA.getReturnType(); 
+    if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar()) 
+      VT = VectorType::get(VT, ICA.getVectorFactor()); 
+ 
+    std::pair<int, MVT> LT = 
+        TLI->getTypeLegalizationCost(DL, VT); 
+    if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || 
+        LT.second == MVT::v16i8) { 
+      // This is a base cost of 1 for the vadd, plus 3 extract shifts if we 
+      // need to extend the type, as it uses shr(qadd(shl, shl)). 
+      unsigned Instrs = LT.second.getScalarSizeInBits() == 
+                                ICA.getReturnType()->getScalarSizeInBits() 
+                            ? 1 
+                            : 4; 
+      return LT.first * ST->getMVEVectorCostFactor() * Instrs; 
+    } 
+    break; 
+  } 
+  } 
+ 
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind); 
+} 
+ 
 bool ARMTTIImpl::isLoweredToCall(const Function *F) {
   if (!F->isIntrinsic())
     BaseT::isLoweredToCall(F);
@@ -1635,93 +1635,93 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
   return BaseT::isLoweredToCall(F);
 }
 
-bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
-  unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
-  EVT VT = TLI->getValueType(DL, I.getType(), true);
-  if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
-    return true;
-
-  // Check if an intrinsic will be lowered to a call and assume that any
-  // other CallInst will generate a bl.
-  if (auto *Call = dyn_cast<CallInst>(&I)) {
-    if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
-      switch(II->getIntrinsicID()) {
-        case Intrinsic::memcpy:
-        case Intrinsic::memset:
-        case Intrinsic::memmove:
-          return getNumMemOps(II) == -1;
-        default:
-          if (const Function *F = Call->getCalledFunction())
-            return isLoweredToCall(F);
-      }
-    }
-    return true;
-  }
-
-  // FPv5 provides conversions between integer, double-precision,
-  // single-precision, and half-precision formats.
-  switch (I.getOpcode()) {
-  default:
-    break;
-  case Instruction::FPToSI:
-  case Instruction::FPToUI:
-  case Instruction::SIToFP:
-  case Instruction::UIToFP:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-    return !ST->hasFPARMv8Base();
-  }
-
-  // FIXME: Unfortunately the approach of checking the Operation Action does
-  // not catch all cases of Legalization that use library calls. Our
-  // Legalization step categorizes some transformations into library calls as
-  // Custom, Expand or even Legal when doing type legalization. So for now
-  // we have to special case for instance the SDIV of 64bit integers and the
-  // use of floating point emulation.
-  if (VT.isInteger() && VT.getSizeInBits() >= 64) {
-    switch (ISD) {
-    default:
-      break;
-    case ISD::SDIV:
-    case ISD::UDIV:
-    case ISD::SREM:
-    case ISD::UREM:
-    case ISD::SDIVREM:
-    case ISD::UDIVREM:
-      return true;
-    }
-  }
-
-  // Assume all other non-float operations are supported.
-  if (!VT.isFloatingPoint())
-    return false;
-
-  // We'll need a library call to handle most floats when using soft.
-  if (TLI->useSoftFloat()) {
-    switch (I.getOpcode()) {
-    default:
-      return true;
-    case Instruction::Alloca:
-    case Instruction::Load:
-    case Instruction::Store:
-    case Instruction::Select:
-    case Instruction::PHI:
-      return false;
-    }
-  }
-
-  // We'll need a libcall to perform double precision operations on a single
-  // precision only FPU.
-  if (I.getType()->isDoubleTy() && !ST->hasFP64())
-    return true;
-
-  // Likewise for half precision arithmetic.
-  if (I.getType()->isHalfTy() && !ST->hasFullFP16())
-    return true;
-
-  return false;
-}
-
+bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) { 
+  unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode()); 
+  EVT VT = TLI->getValueType(DL, I.getType(), true); 
+  if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall) 
+    return true; 
+ 
+  // Check if an intrinsic will be lowered to a call and assume that any 
+  // other CallInst will generate a bl. 
+  if (auto *Call = dyn_cast<CallInst>(&I)) { 
+    if (auto *II = dyn_cast<IntrinsicInst>(Call)) { 
+      switch(II->getIntrinsicID()) { 
+        case Intrinsic::memcpy: 
+        case Intrinsic::memset: 
+        case Intrinsic::memmove: 
+          return getNumMemOps(II) == -1; 
+        default: 
+          if (const Function *F = Call->getCalledFunction()) 
+            return isLoweredToCall(F); 
+      } 
+    } 
+    return true; 
+  } 
+ 
+  // FPv5 provides conversions between integer, double-precision, 
+  // single-precision, and half-precision formats. 
+  switch (I.getOpcode()) { 
+  default: 
+    break; 
+  case Instruction::FPToSI: 
+  case Instruction::FPToUI: 
+  case Instruction::SIToFP: 
+  case Instruction::UIToFP: 
+  case Instruction::FPTrunc: 
+  case Instruction::FPExt: 
+    return !ST->hasFPARMv8Base(); 
+  } 
+ 
+  // FIXME: Unfortunately the approach of checking the Operation Action does 
+  // not catch all cases of Legalization that use library calls. Our 
+  // Legalization step categorizes some transformations into library calls as 
+  // Custom, Expand or even Legal when doing type legalization. So for now 
+  // we have to special case for instance the SDIV of 64bit integers and the 
+  // use of floating point emulation. 
+  if (VT.isInteger() && VT.getSizeInBits() >= 64) { 
+    switch (ISD) { 
+    default: 
+      break; 
+    case ISD::SDIV: 
+    case ISD::UDIV: 
+    case ISD::SREM: 
+    case ISD::UREM: 
+    case ISD::SDIVREM: 
+    case ISD::UDIVREM: 
+      return true; 
+    } 
+  } 
+ 
+  // Assume all other non-float operations are supported. 
+  if (!VT.isFloatingPoint()) 
+    return false; 
+ 
+  // We'll need a library call to handle most floats when using soft. 
+  if (TLI->useSoftFloat()) { 
+    switch (I.getOpcode()) { 
+    default: 
+      return true; 
+    case Instruction::Alloca: 
+    case Instruction::Load: 
+    case Instruction::Store: 
+    case Instruction::Select: 
+    case Instruction::PHI: 
+      return false; 
+    } 
+  } 
+ 
+  // We'll need a libcall to perform double precision operations on a single 
+  // precision only FPU. 
+  if (I.getType()->isDoubleTy() && !ST->hasFP64()) 
+    return true; 
+ 
+  // Likewise for half precision arithmetic. 
+  if (I.getType()->isHalfTy() && !ST->hasFullFP16()) 
+    return true; 
+ 
+  return false; 
+} 
+ 
 bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
                                           AssumptionCache &AC,
                                           TargetLibraryInfo *LibInfo,
@@ -1762,7 +1762,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
       switch (Call->getIntrinsicID()) {
       default:
         break;
-      case Intrinsic::start_loop_iterations:
+      case Intrinsic::start_loop_iterations: 
       case Intrinsic::test_set_loop_iterations:
       case Intrinsic::loop_decrement:
       case Intrinsic::loop_decrement_reg:
@@ -1773,24 +1773,24 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
   };
 
   // Scan the instructions to see if there's any that we know will turn into a
-  // call or if this loop is already a low-overhead loop or will become a tail
-  // predicated loop.
-  bool IsTailPredLoop = false;
+  // call or if this loop is already a low-overhead loop or will become a tail 
+  // predicated loop. 
+  bool IsTailPredLoop = false; 
   auto ScanLoop = [&](Loop *L) {
     for (auto *BB : L->getBlocks()) {
       for (auto &I : *BB) {
-        if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) ||
-            isa<InlineAsm>(I)) {
+        if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) || 
+            isa<InlineAsm>(I)) { 
           LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
           return false;
         }
-        if (auto *II = dyn_cast<IntrinsicInst>(&I))
-          IsTailPredLoop |=
-              II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
-              II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
-              II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
-              II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
-              II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
+        if (auto *II = dyn_cast<IntrinsicInst>(&I)) 
+          IsTailPredLoop |= 
+              II->getIntrinsicID() == Intrinsic::get_active_lane_mask || 
+              II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 || 
+              II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 || 
+              II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 || 
+              II->getIntrinsicID() == Intrinsic::arm_mve_vctp64; 
       }
     }
     return true;
@@ -1811,7 +1811,7 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
   LLVMContext &C = L->getHeader()->getContext();
   HWLoopInfo.CounterInReg = true;
   HWLoopInfo.IsNestingLegal = false;
-  HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
+  HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop; 
   HWLoopInfo.CountType = Type::getInt32Ty(C);
   HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
   return true;
@@ -1859,28 +1859,28 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
                                  const LoopAccessInfo *LAI) {
   LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
 
-  // If there are live-out values, it is probably a reduction. We can predicate
-  // most reduction operations freely under MVE using a combination of
-  // prefer-predicated-reduction-select and inloop reductions. We limit this to
-  // floating point and integer reductions, but don't check for operators
-  // specifically here. If the value ends up not being a reduction (and so the
-  // vectorizer cannot tailfold the loop), we should fall back to standard
-  // vectorization automatically.
+  // If there are live-out values, it is probably a reduction. We can predicate 
+  // most reduction operations freely under MVE using a combination of 
+  // prefer-predicated-reduction-select and inloop reductions. We limit this to 
+  // floating point and integer reductions, but don't check for operators 
+  // specifically here. If the value ends up not being a reduction (and so the 
+  // vectorizer cannot tailfold the loop), we should fall back to standard 
+  // vectorization automatically. 
   SmallVector< Instruction *, 8 > LiveOuts;
   LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
-  bool ReductionsDisabled =
+  bool ReductionsDisabled = 
       EnableTailPredication == TailPredication::EnabledNoReductions ||
       EnableTailPredication == TailPredication::ForceEnabledNoReductions;
 
   for (auto *I : LiveOuts) {
-    if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
-        !I->getType()->isHalfTy()) {
-      LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
+    if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() && 
+        !I->getType()->isHalfTy()) { 
+      LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float " 
                            "live-out value\n");
       return false;
     }
-    if (ReductionsDisabled) {
-      LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
+    if (ReductionsDisabled) { 
+      LLVM_DEBUG(dbgs() << "Reductions not enabled\n"); 
       return false;
     }
   }
@@ -1910,35 +1910,35 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
       if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
         Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
         int64_t NextStride = getPtrStride(PSE, Ptr, L);
-        if (NextStride == 1) {
-          // TODO: for now only allow consecutive strides of 1. We could support
-          // other strides as long as it is uniform, but let's keep it simple
-          // for now.
+        if (NextStride == 1) { 
+          // TODO: for now only allow consecutive strides of 1. We could support 
+          // other strides as long as it is uniform, but let's keep it simple 
+          // for now. 
           continue;
-        } else if (NextStride == -1 ||
-                   (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) ||
-                   (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
-          LLVM_DEBUG(dbgs()
-                     << "Consecutive strides of 2 found, vld2/vstr2 can't "
-                        "be tail-predicated\n.");
+        } else if (NextStride == -1 || 
+                   (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) || 
+                   (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) { 
+          LLVM_DEBUG(dbgs() 
+                     << "Consecutive strides of 2 found, vld2/vstr2 can't " 
+                        "be tail-predicated\n."); 
           return false;
-          // TODO: don't tail predicate if there is a reversed load?
-        } else if (EnableMaskedGatherScatters) {
-          // Gather/scatters do allow loading from arbitrary strides, at
-          // least if they are loop invariant.
-          // TODO: Loop variant strides should in theory work, too, but
-          // this requires further testing.
-          const SCEV *PtrScev =
-              replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
-          if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
-            const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
-            if (PSE.getSE()->isLoopInvariant(Step, L))
-              continue;
-          }
+          // TODO: don't tail predicate if there is a reversed load? 
+        } else if (EnableMaskedGatherScatters) { 
+          // Gather/scatters do allow loading from arbitrary strides, at 
+          // least if they are loop invariant. 
+          // TODO: Loop variant strides should in theory work, too, but 
+          // this requires further testing. 
+          const SCEV *PtrScev = 
+              replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr); 
+          if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) { 
+            const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); 
+            if (PSE.getSE()->isLoopInvariant(Step, L)) 
+              continue; 
+          } 
         }
-        LLVM_DEBUG(dbgs() << "Bad stride found, can't "
-                             "tail-predicate\n.");
-        return false;
+        LLVM_DEBUG(dbgs() << "Bad stride found, can't " 
+                             "tail-predicate\n."); 
+        return false; 
       }
     }
   }
@@ -1971,7 +1971,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
     return false;
   }
 
-  assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
+  assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected"); 
 
   HardwareLoopInfo HWLoopInfo(L);
   if (!HWLoopInfo.canAnalyze(*LI)) {
@@ -2039,10 +2039,10 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
     return;
 
-  // Don't unroll vectorized loops, including the remainder loop
-  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
-    return;
-
+  // Don't unroll vectorized loops, including the remainder loop 
+  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) 
+    return; 
+ 
   // Scan the loop: don't unroll loops with calls as this could prevent
   // inlining.
   unsigned Cost = 0;
@@ -2061,9 +2061,9 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
         return;
       }
 
-      SmallVector<const Value*, 4> Operands(I.operand_values());
-      Cost +=
-        getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
+      SmallVector<const Value*, 4> Operands(I.operand_values()); 
+      Cost += 
+        getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); 
     }
   }
 
@@ -2092,24 +2092,24 @@ bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                        TTI::ReductionFlags Flags) const {
   return ST->hasMVEIntegerOps();
 }
-
-bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
-                                       TTI::ReductionFlags Flags) const {
-  if (!ST->hasMVEIntegerOps())
-    return false;
-
-  unsigned ScalarBits = Ty->getScalarSizeInBits();
-  switch (Opcode) {
-  case Instruction::Add:
-    return ScalarBits <= 64;
-  default:
-    return false;
-  }
-}
-
-bool ARMTTIImpl::preferPredicatedReductionSelect(
-    unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
-  if (!ST->hasMVEIntegerOps())
-    return false;
-  return true;
-}
+ 
+bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty, 
+                                       TTI::ReductionFlags Flags) const { 
+  if (!ST->hasMVEIntegerOps()) 
+    return false; 
+ 
+  unsigned ScalarBits = Ty->getScalarSizeInBits(); 
+  switch (Opcode) { 
+  case Instruction::Add: 
+    return ScalarBits <= 64; 
+  default: 
+    return false; 
+  } 
+} 
+ 
+bool ARMTTIImpl::preferPredicatedReductionSelect( 
+    unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { 
+  if (!ST->hasMVEIntegerOps()) 
+    return false; 
+  return true; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
index 7f045080e3..257e325a28 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -113,9 +113,9 @@ public:
     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
   }
 
-  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                               IntrinsicInst &II) const;
-
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 
+                                               IntrinsicInst &II) const; 
+ 
   /// \name Scalar TTI Implementations
   /// @{
 
@@ -126,8 +126,8 @@ public:
   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
 
   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                        Type *Ty, TTI::TargetCostKind CostKind,
-                        Instruction *Inst = nullptr);
+                        Type *Ty, TTI::TargetCostKind CostKind, 
+                        Instruction *Inst = nullptr); 
 
   /// @}
 
@@ -181,31 +181,31 @@ public:
 
   int getMemcpyCost(const Instruction *I);
 
-  int getNumMemOps(const IntrinsicInst *I) const;
-
+  int getNumMemOps(const IntrinsicInst *I) const; 
+ 
   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
                      VectorType *SubTp);
 
   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                              TTI::ReductionFlags Flags) const;
 
-  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
-                             TTI::ReductionFlags Flags) const;
-
-  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
-                                       TTI::ReductionFlags Flags) const;
+  bool preferInLoopReduction(unsigned Opcode, Type *Ty, 
+                             TTI::ReductionFlags Flags) const; 
 
-  bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 
+                                       TTI::ReductionFlags Flags) const; 
 
-  int getCFInstrCost(unsigned Opcode,
-                     TTI::TargetCostKind CostKind);
+  bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 
 
+  int getCFInstrCost(unsigned Opcode, 
+                     TTI::TargetCostKind CostKind); 
+ 
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                       TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
+                       TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, 
                        const Instruction *I = nullptr);
 
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                         CmpInst::Predicate VecPred,
+                         CmpInst::Predicate VecPred, 
                          TTI::TargetCostKind CostKind,
                          const Instruction *I = nullptr);
 
@@ -229,10 +229,10 @@ public:
                       TTI::TargetCostKind CostKind,
                       const Instruction *I = nullptr);
 
-  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
-                                 unsigned AddressSpace,
-                                 TTI::TargetCostKind CostKind);
-
+  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 
+                                 unsigned AddressSpace, 
+                                 TTI::TargetCostKind CostKind); 
+ 
   int getInterleavedMemoryOpCost(
       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
       Align Alignment, unsigned AddressSpace,
@@ -244,17 +244,17 @@ public:
                                   Align Alignment, TTI::TargetCostKind CostKind,
                                   const Instruction *I = nullptr);
 
-  int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                 bool IsPairwiseForm,
-                                 TTI::TargetCostKind CostKind);
-  InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
-                                              Type *ResTy, VectorType *ValTy,
-                                              TTI::TargetCostKind CostKind);
-
-  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                            TTI::TargetCostKind CostKind);
-
-  bool maybeLoweredToCall(Instruction &I);
+  int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, 
+                                 bool IsPairwiseForm, 
+                                 TTI::TargetCostKind CostKind); 
+  InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, 
+                                              Type *ResTy, VectorType *ValTy, 
+                                              TTI::TargetCostKind CostKind); 
+ 
+  int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 
+                            TTI::TargetCostKind CostKind); 
+ 
+  bool maybeLoweredToCall(Instruction &I); 
   bool isLoweredToCall(const Function *F);
   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
                                 AssumptionCache &AC,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 52577d75dd..b65cfc3811 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6239,9 +6239,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
   StringRef IDVal = Parser.getTok().getIdentifier();
 
   const auto &Prefix =
-      llvm::find_if(PrefixEntries, [&IDVal](const PrefixEntry &PE) {
-        return PE.Spelling == IDVal;
-      });
+      llvm::find_if(PrefixEntries, [&IDVal](const PrefixEntry &PE) { 
+        return PE.Spelling == IDVal; 
+      }); 
   if (Prefix == std::end(PrefixEntries)) {
     Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
     return true;
@@ -10307,14 +10307,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
         !HasWideQualifier) {
       // The operands aren't the same for tMOV[S]r... (no cc_out)
       MCInst TmpInst;
-      unsigned Op = Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr;
-      TmpInst.setOpcode(Op);
+      unsigned Op = Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr; 
+      TmpInst.setOpcode(Op); 
       TmpInst.addOperand(Inst.getOperand(0));
       TmpInst.addOperand(Inst.getOperand(1));
-      if (Op == ARM::tMOVr) {
-        TmpInst.addOperand(Inst.getOperand(2));
-        TmpInst.addOperand(Inst.getOperand(3));
-      }
+      if (Op == ARM::tMOVr) { 
+        TmpInst.addOperand(Inst.getOperand(2)); 
+        TmpInst.addOperand(Inst.getOperand(3)); 
+      } 
       Inst = TmpInst;
       return true;
     }
@@ -10599,12 +10599,12 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
         (isThumb() && !hasV8Ops()))
       return Match_InvalidOperand;
     break;
-  case ARM::t2TBB:
-  case ARM::t2TBH:
-    // Rn = sp is only allowed with ARMv8-A
-    if (!hasV8Ops() && (Inst.getOperand(0).getReg() == ARM::SP))
-      return Match_RequiresV8;
-    break;
+  case ARM::t2TBB: 
+  case ARM::t2TBH: 
+    // Rn = sp is only allowed with ARMv8-A 
+    if (!hasV8Ops() && (Inst.getOperand(0).getReg() == ARM::SP)) 
+      return Match_RequiresV8; 
+    break; 
   default:
     break;
   }
@@ -11135,8 +11135,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
   bool WasThumb = isThumb();
   Triple T;
   MCSubtargetInfo &STI = copySTI();
-  STI.setDefaultFeatures("", /*TuneCPU*/ "",
-                         ("+" + ARM::getArchName(ID)).str());
+  STI.setDefaultFeatures("", /*TuneCPU*/ "", 
+                         ("+" + ARM::getArchName(ID)).str()); 
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   FixModeAfterArchChange(WasThumb, L);
 
@@ -11249,7 +11249,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
 
   bool WasThumb = isThumb();
   MCSubtargetInfo &STI = copySTI();
-  STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+  STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, ""); 
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   FixModeAfterArchChange(WasThumb, L);
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
index 572d301570..ed9aa8099d 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/AsmParser/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/MC/MCParser
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/ARM/TargetInfo
-    contrib/libs/llvm12/lib/Target/ARM/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/MC/MCParser 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/ARM/TargetInfo 
+    contrib/libs/llvm12/lib/Target/ARM/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM/AsmParser
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM/AsmParser 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8ea323a9ce..7953681421 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -860,8 +860,8 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
         VCCPos + 2, MCOI::TIED_TO);
       assert(TiedOp >= 0 &&
              "Inactive register in vpred_r is not tied to an output!");
-      // Copy the operand to ensure it's not invalidated when MI grows.
-      MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp)));
+      // Copy the operand to ensure it's not invalidated when MI grows. 
+      MI.insert(VCCI, MCOperand(MI.getOperand(TiedOp))); 
     }
   } else if (VCC != ARMVCC::None) {
     Check(S, SoftFail);
@@ -4530,14 +4530,14 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
 static DecodeStatus
 DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
                        uint64_t Address, const void *Decoder) {
-  const FeatureBitset &FeatureBits =
-    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+  const FeatureBitset &FeatureBits = 
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); 
   DecodeStatus S = MCDisassembler::Success;
 
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
-  if (Rn == 13 && !FeatureBits[ARM::HasV8Ops]) S = MCDisassembler::SoftFail;
+  if (Rn == 13 && !FeatureBits[ARM::HasV8Ops]) S = MCDisassembler::SoftFail; 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
   if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
index f8ce0c24d9..660cfd1063 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/Disassembler/ya.make
@@ -12,19 +12,19 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/MC/MCDisassembler
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/ARM/TargetInfo
-    contrib/libs/llvm12/lib/Target/ARM/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/MC/MCDisassembler 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/ARM/TargetInfo 
+    contrib/libs/llvm12/lib/Target/ARM/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM/Disassembler
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM/Disassembler 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 8459b4ff2a..07376848c4 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -205,20 +205,20 @@ namespace ARM_AM {
     return V;
   }
 
-  /// isSOImmTwoPartValNeg - Return true if the specified value can be obtained
-  /// by two SOImmVal, that -V = First + Second.
-  /// "R+V" can be optimized to (sub (sub R, First), Second).
-  /// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second).
-  inline bool isSOImmTwoPartValNeg(unsigned V) {
-    unsigned First;
-    if (!isSOImmTwoPartVal(-V))
-      return false;
-    // Return false if ~(-First) is not a SoImmval.
-    First = getSOImmTwoPartFirst(-V);
-    First = ~(-First);
-    return !(rotr32(~255U, getSOImmValRotate(First)) & First);
-  }
-
+  /// isSOImmTwoPartValNeg - Return true if the specified value can be obtained 
+  /// by two SOImmVal, that -V = First + Second. 
+  /// "R+V" can be optimized to (sub (sub R, First), Second). 
+  /// "R=V" can be optimized to (sub (mvn R, ~(-First)), Second). 
+  inline bool isSOImmTwoPartValNeg(unsigned V) { 
+    unsigned First; 
+    if (!isSOImmTwoPartVal(-V)) 
+      return false; 
+    // Return false if ~(-First) is not a SoImmval. 
+    First = getSOImmTwoPartFirst(-V); 
+    First = ~(-First); 
+    return !(rotr32(~255U, getSOImmValRotate(First)) & First); 
+  } 
+ 
   /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
   /// by a left shift. Returns the shift amount to use.
   inline unsigned getThumbImmValShift(unsigned Imm) {
@@ -687,18 +687,18 @@ namespace ARM_AM {
     return getFP16Imm(FPImm.bitcastToAPInt());
   }
 
-  /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
-  /// for it. Otherwise return -1 like getFP16Imm.
-  inline int getFP32FP16Imm(const APInt &Imm) {
-    if (Imm.getActiveBits() > 16)
-      return -1;
-    return ARM_AM::getFP16Imm(Imm.trunc(16));
-  }
-
-  inline int getFP32FP16Imm(const APFloat &FPImm) {
-    return getFP32FP16Imm(FPImm.bitcastToAPInt());
-  }
-
+  /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding 
+  /// for it. Otherwise return -1 like getFP16Imm. 
+  inline int getFP32FP16Imm(const APInt &Imm) { 
+    if (Imm.getActiveBits() > 16) 
+      return -1; 
+    return ARM_AM::getFP16Imm(Imm.trunc(16)); 
+  } 
+ 
+  inline int getFP32FP16Imm(const APFloat &FPImm) { 
+    return getFP32FP16Imm(FPImm.bitcastToAPInt()); 
+  } 
+ 
   /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
   /// floating-point value. If the value cannot be represented as an 8-bit
   /// floating-point value, then return -1.
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b02aef3c33..697eeab4e5 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1010,7 +1010,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_t2_condbranch:
   case ARM::fixup_t2_uncondbranch:
   case ARM::fixup_t2_pcrel_10:
-  case ARM::fixup_t2_pcrel_9:
+  case ARM::fixup_t2_pcrel_9: 
   case ARM::fixup_t2_adr_pcrel_12:
   case ARM::fixup_arm_thumb_bl:
   case ARM::fixup_arm_thumb_blx:
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ecd96114e8..5599eaaf2f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -254,7 +254,7 @@ namespace ARMII {
     MO_OPTION_MASK = 0x3,
 
     /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
-    /// reference is actually to the ".refptr.FOO" symbol.  This is used for
+    /// reference is actually to the ".refptr.FOO" symbol.  This is used for 
     /// stub symbols on windows.
     MO_COFFSTUB = 0x4,
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index d975d799e0..ac75bf3fca 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -30,7 +30,7 @@ public:
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; 
   void printInstruction(const MCInst *MI, uint64_t Address,
                         const MCSubtargetInfo &STI, raw_ostream &O);
   virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 40e8e244e3..a26944a38f 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -87,7 +87,7 @@ void ARMCOFFMCAsmInfoMicrosoft::anchor() { }
 
 ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
   AlignmentIsInBytes = false;
-  SupportsDebugInformation = true;
+  SupportsDebugInformation = true; 
   ExceptionsType = ExceptionHandling::WinEH;
   PrivateGlobalPrefix = "$M";
   PrivateLabelPrefix = "$M";
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 774f2507b8..3da71ade87 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,13 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMMCTargetDesc.h"
-#include "ARMAddressingModes.h"
+#include "ARMAddressingModes.h" 
 #include "ARMBaseInfo.h"
 #include "ARMInstPrinter.h"
 #include "ARMMCAsmInfo.h"
 #include "TargetInfo/ARMTargetInfo.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h" 
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCELFStreamer.h"
@@ -182,23 +182,23 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
   return ARMArchFeature;
 }
 
-bool ARM_MC::isPredicated(const MCInst &MI, const MCInstrInfo *MCII) {
-  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
-  int PredOpIdx = Desc.findFirstPredOperandIdx();
-  return PredOpIdx != -1 && MI.getOperand(PredOpIdx).getImm() != ARMCC::AL;
-}
-
-bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) {
-  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
-  for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
-    const MCOperand &MO = MI.getOperand(I);
-    if (MO.isReg() && MO.getReg() == ARM::CPSR &&
-        Desc.OpInfo[I].isOptionalDef())
-      return true;
-  }
-  return false;
-}
-
+bool ARM_MC::isPredicated(const MCInst &MI, const MCInstrInfo *MCII) { 
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); 
+  int PredOpIdx = Desc.findFirstPredOperandIdx(); 
+  return PredOpIdx != -1 && MI.getOperand(PredOpIdx).getImm() != ARMCC::AL; 
+} 
+ 
+bool ARM_MC::isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII) { 
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode()); 
+  for (unsigned I = 0; I < MI.getNumOperands(); ++I) { 
+    const MCOperand &MO = MI.getOperand(I); 
+    if (MO.isReg() && MO.getReg() == ARM::CPSR && 
+        Desc.OpInfo[I].isOptionalDef()) 
+      return true; 
+  } 
+  return false; 
+} 
+ 
 MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
                                                   StringRef CPU, StringRef FS) {
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
@@ -209,7 +209,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
       ArchFS = std::string(FS);
   }
 
-  return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS);
+  return createARMMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, ArchFS); 
 }
 
 static MCInstrInfo *createARMMCInstrInfo() {
@@ -218,120 +218,120 @@ static MCInstrInfo *createARMMCInstrInfo() {
   return X;
 }
 
-void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
-  // Mapping from CodeView to MC register id.
-  static const struct {
-    codeview::RegisterId CVReg;
-    MCPhysReg Reg;
-  } RegMap[] = {
-      {codeview::RegisterId::ARM_R0, ARM::R0},
-      {codeview::RegisterId::ARM_R1, ARM::R1},
-      {codeview::RegisterId::ARM_R2, ARM::R2},
-      {codeview::RegisterId::ARM_R3, ARM::R3},
-      {codeview::RegisterId::ARM_R4, ARM::R4},
-      {codeview::RegisterId::ARM_R5, ARM::R5},
-      {codeview::RegisterId::ARM_R6, ARM::R6},
-      {codeview::RegisterId::ARM_R7, ARM::R7},
-      {codeview::RegisterId::ARM_R8, ARM::R8},
-      {codeview::RegisterId::ARM_R9, ARM::R9},
-      {codeview::RegisterId::ARM_R10, ARM::R10},
-      {codeview::RegisterId::ARM_R11, ARM::R11},
-      {codeview::RegisterId::ARM_R12, ARM::R12},
-      {codeview::RegisterId::ARM_SP, ARM::SP},
-      {codeview::RegisterId::ARM_LR, ARM::LR},
-      {codeview::RegisterId::ARM_PC, ARM::PC},
-      {codeview::RegisterId::ARM_CPSR, ARM::CPSR},
-      {codeview::RegisterId::ARM_FPSCR, ARM::FPSCR},
-      {codeview::RegisterId::ARM_FPEXC, ARM::FPEXC},
-      {codeview::RegisterId::ARM_FS0, ARM::S0},
-      {codeview::RegisterId::ARM_FS1, ARM::S1},
-      {codeview::RegisterId::ARM_FS2, ARM::S2},
-      {codeview::RegisterId::ARM_FS3, ARM::S3},
-      {codeview::RegisterId::ARM_FS4, ARM::S4},
-      {codeview::RegisterId::ARM_FS5, ARM::S5},
-      {codeview::RegisterId::ARM_FS6, ARM::S6},
-      {codeview::RegisterId::ARM_FS7, ARM::S7},
-      {codeview::RegisterId::ARM_FS8, ARM::S8},
-      {codeview::RegisterId::ARM_FS9, ARM::S9},
-      {codeview::RegisterId::ARM_FS10, ARM::S10},
-      {codeview::RegisterId::ARM_FS11, ARM::S11},
-      {codeview::RegisterId::ARM_FS12, ARM::S12},
-      {codeview::RegisterId::ARM_FS13, ARM::S13},
-      {codeview::RegisterId::ARM_FS14, ARM::S14},
-      {codeview::RegisterId::ARM_FS15, ARM::S15},
-      {codeview::RegisterId::ARM_FS16, ARM::S16},
-      {codeview::RegisterId::ARM_FS17, ARM::S17},
-      {codeview::RegisterId::ARM_FS18, ARM::S18},
-      {codeview::RegisterId::ARM_FS19, ARM::S19},
-      {codeview::RegisterId::ARM_FS20, ARM::S20},
-      {codeview::RegisterId::ARM_FS21, ARM::S21},
-      {codeview::RegisterId::ARM_FS22, ARM::S22},
-      {codeview::RegisterId::ARM_FS23, ARM::S23},
-      {codeview::RegisterId::ARM_FS24, ARM::S24},
-      {codeview::RegisterId::ARM_FS25, ARM::S25},
-      {codeview::RegisterId::ARM_FS26, ARM::S26},
-      {codeview::RegisterId::ARM_FS27, ARM::S27},
-      {codeview::RegisterId::ARM_FS28, ARM::S28},
-      {codeview::RegisterId::ARM_FS29, ARM::S29},
-      {codeview::RegisterId::ARM_FS30, ARM::S30},
-      {codeview::RegisterId::ARM_FS31, ARM::S31},
-      {codeview::RegisterId::ARM_ND0, ARM::D0},
-      {codeview::RegisterId::ARM_ND1, ARM::D1},
-      {codeview::RegisterId::ARM_ND2, ARM::D2},
-      {codeview::RegisterId::ARM_ND3, ARM::D3},
-      {codeview::RegisterId::ARM_ND4, ARM::D4},
-      {codeview::RegisterId::ARM_ND5, ARM::D5},
-      {codeview::RegisterId::ARM_ND6, ARM::D6},
-      {codeview::RegisterId::ARM_ND7, ARM::D7},
-      {codeview::RegisterId::ARM_ND8, ARM::D8},
-      {codeview::RegisterId::ARM_ND9, ARM::D9},
-      {codeview::RegisterId::ARM_ND10, ARM::D10},
-      {codeview::RegisterId::ARM_ND11, ARM::D11},
-      {codeview::RegisterId::ARM_ND12, ARM::D12},
-      {codeview::RegisterId::ARM_ND13, ARM::D13},
-      {codeview::RegisterId::ARM_ND14, ARM::D14},
-      {codeview::RegisterId::ARM_ND15, ARM::D15},
-      {codeview::RegisterId::ARM_ND16, ARM::D16},
-      {codeview::RegisterId::ARM_ND17, ARM::D17},
-      {codeview::RegisterId::ARM_ND18, ARM::D18},
-      {codeview::RegisterId::ARM_ND19, ARM::D19},
-      {codeview::RegisterId::ARM_ND20, ARM::D20},
-      {codeview::RegisterId::ARM_ND21, ARM::D21},
-      {codeview::RegisterId::ARM_ND22, ARM::D22},
-      {codeview::RegisterId::ARM_ND23, ARM::D23},
-      {codeview::RegisterId::ARM_ND24, ARM::D24},
-      {codeview::RegisterId::ARM_ND25, ARM::D25},
-      {codeview::RegisterId::ARM_ND26, ARM::D26},
-      {codeview::RegisterId::ARM_ND27, ARM::D27},
-      {codeview::RegisterId::ARM_ND28, ARM::D28},
-      {codeview::RegisterId::ARM_ND29, ARM::D29},
-      {codeview::RegisterId::ARM_ND30, ARM::D30},
-      {codeview::RegisterId::ARM_ND31, ARM::D31},
-      {codeview::RegisterId::ARM_NQ0, ARM::Q0},
-      {codeview::RegisterId::ARM_NQ1, ARM::Q1},
-      {codeview::RegisterId::ARM_NQ2, ARM::Q2},
-      {codeview::RegisterId::ARM_NQ3, ARM::Q3},
-      {codeview::RegisterId::ARM_NQ4, ARM::Q4},
-      {codeview::RegisterId::ARM_NQ5, ARM::Q5},
-      {codeview::RegisterId::ARM_NQ6, ARM::Q6},
-      {codeview::RegisterId::ARM_NQ7, ARM::Q7},
-      {codeview::RegisterId::ARM_NQ8, ARM::Q8},
-      {codeview::RegisterId::ARM_NQ9, ARM::Q9},
-      {codeview::RegisterId::ARM_NQ10, ARM::Q10},
-      {codeview::RegisterId::ARM_NQ11, ARM::Q11},
-      {codeview::RegisterId::ARM_NQ12, ARM::Q12},
-      {codeview::RegisterId::ARM_NQ13, ARM::Q13},
-      {codeview::RegisterId::ARM_NQ14, ARM::Q14},
-      {codeview::RegisterId::ARM_NQ15, ARM::Q15},
-  };
-  for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
-    MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
-}
-
+void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { 
+  // Mapping from CodeView to MC register id. 
+  static const struct { 
+    codeview::RegisterId CVReg; 
+    MCPhysReg Reg; 
+  } RegMap[] = { 
+      {codeview::RegisterId::ARM_R0, ARM::R0}, 
+      {codeview::RegisterId::ARM_R1, ARM::R1}, 
+      {codeview::RegisterId::ARM_R2, ARM::R2}, 
+      {codeview::RegisterId::ARM_R3, ARM::R3}, 
+      {codeview::RegisterId::ARM_R4, ARM::R4}, 
+      {codeview::RegisterId::ARM_R5, ARM::R5}, 
+      {codeview::RegisterId::ARM_R6, ARM::R6}, 
+      {codeview::RegisterId::ARM_R7, ARM::R7}, 
+      {codeview::RegisterId::ARM_R8, ARM::R8}, 
+      {codeview::RegisterId::ARM_R9, ARM::R9}, 
+      {codeview::RegisterId::ARM_R10, ARM::R10}, 
+      {codeview::RegisterId::ARM_R11, ARM::R11}, 
+      {codeview::RegisterId::ARM_R12, ARM::R12}, 
+      {codeview::RegisterId::ARM_SP, ARM::SP}, 
+      {codeview::RegisterId::ARM_LR, ARM::LR}, 
+      {codeview::RegisterId::ARM_PC, ARM::PC}, 
+      {codeview::RegisterId::ARM_CPSR, ARM::CPSR}, 
+      {codeview::RegisterId::ARM_FPSCR, ARM::FPSCR}, 
+      {codeview::RegisterId::ARM_FPEXC, ARM::FPEXC}, 
+      {codeview::RegisterId::ARM_FS0, ARM::S0}, 
+      {codeview::RegisterId::ARM_FS1, ARM::S1}, 
+      {codeview::RegisterId::ARM_FS2, ARM::S2}, 
+      {codeview::RegisterId::ARM_FS3, ARM::S3}, 
+      {codeview::RegisterId::ARM_FS4, ARM::S4}, 
+      {codeview::RegisterId::ARM_FS5, ARM::S5}, 
+      {codeview::RegisterId::ARM_FS6, ARM::S6}, 
+      {codeview::RegisterId::ARM_FS7, ARM::S7}, 
+      {codeview::RegisterId::ARM_FS8, ARM::S8}, 
+      {codeview::RegisterId::ARM_FS9, ARM::S9}, 
+      {codeview::RegisterId::ARM_FS10, ARM::S10}, 
+      {codeview::RegisterId::ARM_FS11, ARM::S11}, 
+      {codeview::RegisterId::ARM_FS12, ARM::S12}, 
+      {codeview::RegisterId::ARM_FS13, ARM::S13}, 
+      {codeview::RegisterId::ARM_FS14, ARM::S14}, 
+      {codeview::RegisterId::ARM_FS15, ARM::S15}, 
+      {codeview::RegisterId::ARM_FS16, ARM::S16}, 
+      {codeview::RegisterId::ARM_FS17, ARM::S17}, 
+      {codeview::RegisterId::ARM_FS18, ARM::S18}, 
+      {codeview::RegisterId::ARM_FS19, ARM::S19}, 
+      {codeview::RegisterId::ARM_FS20, ARM::S20}, 
+      {codeview::RegisterId::ARM_FS21, ARM::S21}, 
+      {codeview::RegisterId::ARM_FS22, ARM::S22}, 
+      {codeview::RegisterId::ARM_FS23, ARM::S23}, 
+      {codeview::RegisterId::ARM_FS24, ARM::S24}, 
+      {codeview::RegisterId::ARM_FS25, ARM::S25}, 
+      {codeview::RegisterId::ARM_FS26, ARM::S26}, 
+      {codeview::RegisterId::ARM_FS27, ARM::S27}, 
+      {codeview::RegisterId::ARM_FS28, ARM::S28}, 
+      {codeview::RegisterId::ARM_FS29, ARM::S29}, 
+      {codeview::RegisterId::ARM_FS30, ARM::S30}, 
+      {codeview::RegisterId::ARM_FS31, ARM::S31}, 
+      {codeview::RegisterId::ARM_ND0, ARM::D0}, 
+      {codeview::RegisterId::ARM_ND1, ARM::D1}, 
+      {codeview::RegisterId::ARM_ND2, ARM::D2}, 
+      {codeview::RegisterId::ARM_ND3, ARM::D3}, 
+      {codeview::RegisterId::ARM_ND4, ARM::D4}, 
+      {codeview::RegisterId::ARM_ND5, ARM::D5}, 
+      {codeview::RegisterId::ARM_ND6, ARM::D6}, 
+      {codeview::RegisterId::ARM_ND7, ARM::D7}, 
+      {codeview::RegisterId::ARM_ND8, ARM::D8}, 
+      {codeview::RegisterId::ARM_ND9, ARM::D9}, 
+      {codeview::RegisterId::ARM_ND10, ARM::D10}, 
+      {codeview::RegisterId::ARM_ND11, ARM::D11}, 
+      {codeview::RegisterId::ARM_ND12, ARM::D12}, 
+      {codeview::RegisterId::ARM_ND13, ARM::D13}, 
+      {codeview::RegisterId::ARM_ND14, ARM::D14}, 
+      {codeview::RegisterId::ARM_ND15, ARM::D15}, 
+      {codeview::RegisterId::ARM_ND16, ARM::D16}, 
+      {codeview::RegisterId::ARM_ND17, ARM::D17}, 
+      {codeview::RegisterId::ARM_ND18, ARM::D18}, 
+      {codeview::RegisterId::ARM_ND19, ARM::D19}, 
+      {codeview::RegisterId::ARM_ND20, ARM::D20}, 
+      {codeview::RegisterId::ARM_ND21, ARM::D21}, 
+      {codeview::RegisterId::ARM_ND22, ARM::D22}, 
+      {codeview::RegisterId::ARM_ND23, ARM::D23}, 
+      {codeview::RegisterId::ARM_ND24, ARM::D24}, 
+      {codeview::RegisterId::ARM_ND25, ARM::D25}, 
+      {codeview::RegisterId::ARM_ND26, ARM::D26}, 
+      {codeview::RegisterId::ARM_ND27, ARM::D27}, 
+      {codeview::RegisterId::ARM_ND28, ARM::D28}, 
+      {codeview::RegisterId::ARM_ND29, ARM::D29}, 
+      {codeview::RegisterId::ARM_ND30, ARM::D30}, 
+      {codeview::RegisterId::ARM_ND31, ARM::D31}, 
+      {codeview::RegisterId::ARM_NQ0, ARM::Q0}, 
+      {codeview::RegisterId::ARM_NQ1, ARM::Q1}, 
+      {codeview::RegisterId::ARM_NQ2, ARM::Q2}, 
+      {codeview::RegisterId::ARM_NQ3, ARM::Q3}, 
+      {codeview::RegisterId::ARM_NQ4, ARM::Q4}, 
+      {codeview::RegisterId::ARM_NQ5, ARM::Q5}, 
+      {codeview::RegisterId::ARM_NQ6, ARM::Q6}, 
+      {codeview::RegisterId::ARM_NQ7, ARM::Q7}, 
+      {codeview::RegisterId::ARM_NQ8, ARM::Q8}, 
+      {codeview::RegisterId::ARM_NQ9, ARM::Q9}, 
+      {codeview::RegisterId::ARM_NQ10, ARM::Q10}, 
+      {codeview::RegisterId::ARM_NQ11, ARM::Q11}, 
+      {codeview::RegisterId::ARM_NQ12, ARM::Q12}, 
+      {codeview::RegisterId::ARM_NQ13, ARM::Q13}, 
+      {codeview::RegisterId::ARM_NQ14, ARM::Q14}, 
+      {codeview::RegisterId::ARM_NQ15, ARM::Q15}, 
+  }; 
+  for (unsigned I = 0; I < array_lengthof(RegMap); ++I) 
+    MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg)); 
+} 
+ 
 static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
-  ARM_MC::initLLVMToCVRegMapping(X);
+  ARM_MC::initLLVMToCVRegMapping(X); 
   return X;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 5a0874f0ef..a84576e757 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -41,22 +41,22 @@ class raw_pwrite_stream;
 
 namespace ARM_MC {
 std::string ParseARMTriple(const Triple &TT, StringRef CPU);
-void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
-
-bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII);
-bool isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII);
-
-template<class Inst>
-bool isLDMBaseRegInList(const Inst &MI) {
-  auto BaseReg = MI.getOperand(0).getReg();
-  for (unsigned I = 1, E = MI.getNumOperands(); I < E; ++I) {
-    const auto &Op = MI.getOperand(I);
-    if (Op.isReg() && Op.getReg() == BaseReg)
-      return true;
-  }
-  return false;
-}
-
+void initLLVMToCVRegMapping(MCRegisterInfo *MRI); 
+
+bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII); 
+bool isCPSRDefined(const MCInst &MI, const MCInstrInfo *MCII); 
+ 
+template<class Inst> 
+bool isLDMBaseRegInList(const Inst &MI) { 
+  auto BaseReg = MI.getOperand(0).getReg(); 
+  for (unsigned I = 1, E = MI.getNumOperands(); I < E; ++I) { 
+    const auto &Op = MI.getOperand(I); 
+    if (Op.isReg() && Op.getReg() == BaseReg) 
+      return true; 
+  } 
+  return false; 
+} 
+ 
 /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
 /// do not need to go through TargetRegistry.
 MCSubtargetInfo *createARMMCSubtargetInfo(const Triple &TT, StringRef CPU,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
index b92b47d057..0256e1fdac 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/BinaryFormat
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/MC/MCDisassembler
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/ARM/TargetInfo
-    contrib/libs/llvm12/lib/Target/ARM/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/BinaryFormat 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/MC/MCDisassembler 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/ARM/TargetInfo 
+    contrib/libs/llvm12/lib/Target/ARM/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 56823735e2..0b6cdee512 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -44,10 +44,10 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
+#define DEBUG_TYPE "arm-mve-gather-scatter-lowering" 
 
 cl::opt<bool> EnableMaskedGatherScatters(
-    "enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
+    "enable-arm-maskedgatscat", cl::Hidden, cl::init(true), 
     cl::desc("Enable the generation of masked gathers and scatters"));
 
 namespace {
@@ -84,7 +84,7 @@ private:
   // Check for a getelementptr and deduce base and offsets from it, on success
   // returning the base directly and the offsets indirectly using the Offsets
   // argument
-  Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP,
+  Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP, 
                   IRBuilder<> &Builder);
   // Compute the scale of this gather/scatter instruction
   int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
@@ -132,11 +132,11 @@ private:
   Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr,
                                         Value *Ptr, unsigned TypeScale,
                                         IRBuilder<> &Builder);
-
-  // Optimise the base and offsets of the given address
-  bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI);
-  // Try to fold consecutive geps together into one
-  Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
+ 
+  // Optimise the base and offsets of the given address 
+  bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI); 
+  // Try to fold consecutive geps together into one 
+  Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder); 
   // Check whether these offsets could be moved out of the loop they're in
   bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
   // Pushes the given add out of the loop
@@ -172,49 +172,49 @@ bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
   return false;
 }
 
-static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) {
-  // Offsets that are not of type <N x i32> are sign extended by the
-  // getelementptr instruction, and MVE gathers/scatters treat the offset as
-  // unsigned. Thus, if the element size is smaller than 32, we can only allow
-  // positive offsets - i.e., the offsets are not allowed to be variables we
-  // can't look into.
-  // Additionally, <N x i32> offsets have to either originate from a zext of a
-  // vector with element types smaller or equal the type of the gather we're
-  // looking at, or consist of constants that we can check are small enough
-  // to fit into the gather type.
-  // Thus we check that 0 < value < 2^TargetElemSize.
-  unsigned TargetElemSize = 128 / TargetElemCount;
-  unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType())
-                                ->getElementType()
-                                ->getScalarSizeInBits();
-  if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) {
-    Constant *ConstOff = dyn_cast<Constant>(Offsets);
-    if (!ConstOff)
-      return false;
-    int64_t TargetElemMaxSize = (1ULL << TargetElemSize);
-    auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) {
-      ConstantInt *OConst = dyn_cast<ConstantInt>(OffsetElem);
-      if (!OConst)
-        return false;
-      int SExtValue = OConst->getSExtValue();
-      if (SExtValue >= TargetElemMaxSize || SExtValue < 0)
-        return false;
-      return true;
-    };
-    if (isa<FixedVectorType>(ConstOff->getType())) {
-      for (unsigned i = 0; i < TargetElemCount; i++) {
-        if (!CheckValueSize(ConstOff->getAggregateElement(i)))
-          return false;
-      }
-    } else {
-      if (!CheckValueSize(ConstOff))
-        return false;
-    }
-  }
-  return true;
-}
-
-Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
+static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) { 
+  // Offsets that are not of type <N x i32> are sign extended by the 
+  // getelementptr instruction, and MVE gathers/scatters treat the offset as 
+  // unsigned. Thus, if the element size is smaller than 32, we can only allow 
+  // positive offsets - i.e., the offsets are not allowed to be variables we 
+  // can't look into. 
+  // Additionally, <N x i32> offsets have to either originate from a zext of a 
+  // vector with element types smaller or equal the type of the gather we're 
+  // looking at, or consist of constants that we can check are small enough 
+  // to fit into the gather type. 
+  // Thus we check that 0 < value < 2^TargetElemSize. 
+  unsigned TargetElemSize = 128 / TargetElemCount; 
+  unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType()) 
+                                ->getElementType() 
+                                ->getScalarSizeInBits(); 
+  if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) { 
+    Constant *ConstOff = dyn_cast<Constant>(Offsets); 
+    if (!ConstOff) 
+      return false; 
+    int64_t TargetElemMaxSize = (1ULL << TargetElemSize); 
+    auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) { 
+      ConstantInt *OConst = dyn_cast<ConstantInt>(OffsetElem); 
+      if (!OConst) 
+        return false; 
+      int SExtValue = OConst->getSExtValue(); 
+      if (SExtValue >= TargetElemMaxSize || SExtValue < 0) 
+        return false; 
+      return true; 
+    }; 
+    if (isa<FixedVectorType>(ConstOff->getType())) { 
+      for (unsigned i = 0; i < TargetElemCount; i++) { 
+        if (!CheckValueSize(ConstOff->getAggregateElement(i))) 
+          return false; 
+      } 
+    } else { 
+      if (!CheckValueSize(ConstOff)) 
+        return false; 
+    } 
+  } 
+  return true; 
+} 
+ 
+Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty, 
                                           GetElementPtrInst *GEP,
                                           IRBuilder<> &Builder) {
   if (!GEP) {
@@ -225,43 +225,43 @@ Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty,
   LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
                     << " Looking at intrinsic for base + vector of offsets\n");
   Value *GEPPtr = GEP->getPointerOperand();
-  Offsets = GEP->getOperand(1);
-  if (GEPPtr->getType()->isVectorTy() ||
-      !isa<FixedVectorType>(Offsets->getType()))
+  Offsets = GEP->getOperand(1); 
+  if (GEPPtr->getType()->isVectorTy() || 
+      !isa<FixedVectorType>(Offsets->getType())) 
     return nullptr;
-
+ 
   if (GEP->getNumOperands() != 2) {
     LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
                       << " operands. Expanding.\n");
     return nullptr;
   }
   Offsets = GEP->getOperand(1);
-  unsigned OffsetsElemCount =
-      cast<FixedVectorType>(Offsets->getType())->getNumElements();
+  unsigned OffsetsElemCount = 
+      cast<FixedVectorType>(Offsets->getType())->getNumElements(); 
   // Paranoid check whether the number of parallel lanes is the same
-  assert(Ty->getNumElements() == OffsetsElemCount);
-
-  ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets);
-  if (ZextOffs)
+  assert(Ty->getNumElements() == OffsetsElemCount); 
+ 
+  ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets); 
+  if (ZextOffs) 
     Offsets = ZextOffs->getOperand(0);
-  FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType());
-
-  // If the offsets are already being zext-ed to <N x i32>, that relieves us of
-  // having to make sure that they won't overflow.
-  if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy())
-                           ->getElementType()
-                           ->getScalarSizeInBits() != 32)
-    if (!checkOffsetSize(Offsets, OffsetsElemCount))
-      return nullptr;
-
-  // The offset sizes have been checked; if any truncating or zext-ing is
-  // required to fix them, do that now
+  FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType()); 
+
+  // If the offsets are already being zext-ed to <N x i32>, that relieves us of 
+  // having to make sure that they won't overflow. 
+  if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy()) 
+                           ->getElementType() 
+                           ->getScalarSizeInBits() != 32) 
+    if (!checkOffsetSize(Offsets, OffsetsElemCount)) 
+      return nullptr; 
+ 
+  // The offset sizes have been checked; if any truncating or zext-ing is 
+  // required to fix them, do that now 
   if (Ty != Offsets->getType()) {
-    if ((Ty->getElementType()->getScalarSizeInBits() <
-         OffsetType->getElementType()->getScalarSizeInBits())) {
-      Offsets = Builder.CreateTrunc(Offsets, Ty);
+    if ((Ty->getElementType()->getScalarSizeInBits() < 
+         OffsetType->getElementType()->getScalarSizeInBits())) { 
+      Offsets = Builder.CreateTrunc(Offsets, Ty); 
     } else {
-      Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty));
+      Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty)); 
     }
   }
   // If none of the checks failed, return the gep's base pointer
@@ -476,8 +476,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
 
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
   Value *Offsets;
-  Value *BasePtr =
-      checkGEP(Offsets, cast<FixedVectorType>(ResultTy), GEP, Builder);
+  Value *BasePtr = 
+      checkGEP(Offsets, cast<FixedVectorType>(ResultTy), GEP, Builder); 
   if (!BasePtr)
     return nullptr;
   // Check whether the offset is a constant increment that could be merged into
@@ -617,8 +617,8 @@ Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
 
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
   Value *Offsets;
-  Value *BasePtr =
-      checkGEP(Offsets, cast<FixedVectorType>(InputTy), GEP, Builder);
+  Value *BasePtr = 
+      checkGEP(Offsets, cast<FixedVectorType>(InputTy), GEP, Builder); 
   if (!BasePtr)
     return nullptr;
   // Check whether the offset is a constant increment that could be merged into
@@ -941,7 +941,7 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
   int IncrementingBlock = -1;
 
   for (int i = 0; i < 2; i++)
-    if (auto *Op = dyn_cast<Instruction>(Phi->getIncomingValue(i)))
+    if (auto *Op = dyn_cast<Instruction>(Phi->getIncomingValue(i))) 
       if (Op->getOpcode() == Instruction::Add &&
           (Op->getOperand(0) == Phi || Op->getOperand(1) == Phi))
         IncrementingBlock = i;
@@ -960,8 +960,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
   // Get the value that is added to/multiplied with the phi
   Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp);
 
-  if (IncrementPerRound->getType() != OffsSecondOperand->getType() ||
-      !L->isLoopInvariant(OffsSecondOperand))
+  if (IncrementPerRound->getType() != OffsSecondOperand->getType() || 
+      !L->isLoopInvariant(OffsSecondOperand)) 
     // Something has gone wrong, abort
     return false;
 
@@ -1029,128 +1029,128 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
   return true;
 }
 
-static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
-                                      IRBuilder<> &Builder) {
-  // Splat the non-vector value to a vector of the given type - if the value is
-  // a constant (and its value isn't too big), we can even use this opportunity
-  // to scale it to the size of the vector elements
-  auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) {
-    ConstantInt *Const;
-    if ((Const = dyn_cast<ConstantInt>(NonVectorVal)) &&
-        VT->getElementType() != NonVectorVal->getType()) {
-      unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits();
-      uint64_t N = Const->getZExtValue();
-      if (N < (unsigned)(1 << (TargetElemSize - 1))) {
-        NonVectorVal = Builder.CreateVectorSplat(
-            VT->getNumElements(), Builder.getIntN(TargetElemSize, N));
-        return;
-      }
-    }
-    NonVectorVal =
-        Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal);
-  };
-
-  FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType());
-  FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType());
-  // If one of X, Y is not a vector, we have to splat it in order
-  // to add the two of them.
-  if (XElType && !YElType) {
-    FixSummands(XElType, Y);
-    YElType = cast<FixedVectorType>(Y->getType());
-  } else if (YElType && !XElType) {
-    FixSummands(YElType, X);
-    XElType = cast<FixedVectorType>(X->getType());
-  }
-  assert(XElType && YElType && "Unknown vector types");
-  // Check that the summands are of compatible types
-  if (XElType != YElType) {
-    LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n");
-    return nullptr;
-  }
-
-  if (XElType->getElementType()->getScalarSizeInBits() != 32) {
-    // Check that by adding the vectors we do not accidentally
-    // create an overflow
-    Constant *ConstX = dyn_cast<Constant>(X);
-    Constant *ConstY = dyn_cast<Constant>(Y);
-    if (!ConstX || !ConstY)
-      return nullptr;
-    unsigned TargetElemSize = 128 / XElType->getNumElements();
-    for (unsigned i = 0; i < XElType->getNumElements(); i++) {
-      ConstantInt *ConstXEl =
-          dyn_cast<ConstantInt>(ConstX->getAggregateElement(i));
-      ConstantInt *ConstYEl =
-          dyn_cast<ConstantInt>(ConstY->getAggregateElement(i));
-      if (!ConstXEl || !ConstYEl ||
-          ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >=
-              (unsigned)(1 << (TargetElemSize - 1)))
-        return nullptr;
-    }
-  }
-
-  Value *Add = Builder.CreateAdd(X, Y);
-
-  FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType());
-  if (checkOffsetSize(Add, GEPType->getNumElements()))
-    return Add;
-  else
-    return nullptr;
-}
-
-Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP,
-                                         Value *&Offsets,
-                                         IRBuilder<> &Builder) {
-  Value *GEPPtr = GEP->getPointerOperand();
-  Offsets = GEP->getOperand(1);
-  // We only merge geps with constant offsets, because only for those
-  // we can make sure that we do not cause an overflow
-  if (!isa<Constant>(Offsets))
-    return nullptr;
-  GetElementPtrInst *BaseGEP;
-  if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
-    // Merge the two geps into one
-    Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder);
-    if (!BaseBasePtr)
-      return nullptr;
-    Offsets =
-        CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder);
-    if (Offsets == nullptr)
-      return nullptr;
-    return BaseBasePtr;
-  }
-  return GEPPtr;
-}
-
-bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
-                                               LoopInfo *LI) {
-  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Address);
-  if (!GEP)
-    return false;
-  bool Changed = false;
-  if (GEP->hasOneUse() &&
-      dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
-    IRBuilder<> Builder(GEP->getContext());
-    Builder.SetInsertPoint(GEP);
-    Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
-    Value *Offsets;
-    Value *Base = foldGEP(GEP, Offsets, Builder);
-    // We only want to merge the geps if there is a real chance that they can be
-    // used by an MVE gather; thus the offset has to have the correct size
-    // (always i32 if it is not of vector type) and the base has to be a
-    // pointer.
-    if (Offsets && Base && Base != GEP) {
-      PointerType *BaseType = cast<PointerType>(Base->getType());
-      GetElementPtrInst *NewAddress = GetElementPtrInst::Create(
-          BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP);
-      GEP->replaceAllUsesWith(NewAddress);
-      GEP = NewAddress;
-      Changed = true;
-    }
-  }
-  Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI);
-  return Changed;
-}
-
+static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, 
+                                      IRBuilder<> &Builder) { 
+  // Splat the non-vector value to a vector of the given type - if the value is 
+  // a constant (and its value isn't too big), we can even use this opportunity 
+  // to scale it to the size of the vector elements 
+  auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) { 
+    ConstantInt *Const; 
+    if ((Const = dyn_cast<ConstantInt>(NonVectorVal)) && 
+        VT->getElementType() != NonVectorVal->getType()) { 
+      unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits(); 
+      uint64_t N = Const->getZExtValue(); 
+      if (N < (unsigned)(1 << (TargetElemSize - 1))) { 
+        NonVectorVal = Builder.CreateVectorSplat( 
+            VT->getNumElements(), Builder.getIntN(TargetElemSize, N)); 
+        return; 
+      } 
+    } 
+    NonVectorVal = 
+        Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal); 
+  }; 
+ 
+  FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType()); 
+  FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType()); 
+  // If one of X, Y is not a vector, we have to splat it in order 
+  // to add the two of them. 
+  if (XElType && !YElType) { 
+    FixSummands(XElType, Y); 
+    YElType = cast<FixedVectorType>(Y->getType()); 
+  } else if (YElType && !XElType) { 
+    FixSummands(YElType, X); 
+    XElType = cast<FixedVectorType>(X->getType()); 
+  } 
+  assert(XElType && YElType && "Unknown vector types"); 
+  // Check that the summands are of compatible types 
+  if (XElType != YElType) { 
+    LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n"); 
+    return nullptr; 
+  } 
+ 
+  if (XElType->getElementType()->getScalarSizeInBits() != 32) { 
+    // Check that by adding the vectors we do not accidentally 
+    // create an overflow 
+    Constant *ConstX = dyn_cast<Constant>(X); 
+    Constant *ConstY = dyn_cast<Constant>(Y); 
+    if (!ConstX || !ConstY) 
+      return nullptr; 
+    unsigned TargetElemSize = 128 / XElType->getNumElements(); 
+    for (unsigned i = 0; i < XElType->getNumElements(); i++) { 
+      ConstantInt *ConstXEl = 
+          dyn_cast<ConstantInt>(ConstX->getAggregateElement(i)); 
+      ConstantInt *ConstYEl = 
+          dyn_cast<ConstantInt>(ConstY->getAggregateElement(i)); 
+      if (!ConstXEl || !ConstYEl || 
+          ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >= 
+              (unsigned)(1 << (TargetElemSize - 1))) 
+        return nullptr; 
+    } 
+  } 
+ 
+  Value *Add = Builder.CreateAdd(X, Y); 
+ 
+  FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType()); 
+  if (checkOffsetSize(Add, GEPType->getNumElements())) 
+    return Add; 
+  else 
+    return nullptr; 
+} 
+ 
+Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP, 
+                                         Value *&Offsets, 
+                                         IRBuilder<> &Builder) { 
+  Value *GEPPtr = GEP->getPointerOperand(); 
+  Offsets = GEP->getOperand(1); 
+  // We only merge geps with constant offsets, because only for those 
+  // we can make sure that we do not cause an overflow 
+  if (!isa<Constant>(Offsets)) 
+    return nullptr; 
+  GetElementPtrInst *BaseGEP; 
+  if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) { 
+    // Merge the two geps into one 
+    Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder); 
+    if (!BaseBasePtr) 
+      return nullptr; 
+    Offsets = 
+        CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder); 
+    if (Offsets == nullptr) 
+      return nullptr; 
+    return BaseBasePtr; 
+  } 
+  return GEPPtr; 
+} 
+ 
+bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB, 
+                                               LoopInfo *LI) { 
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Address); 
+  if (!GEP) 
+    return false; 
+  bool Changed = false; 
+  if (GEP->hasOneUse() && 
+      dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) { 
+    IRBuilder<> Builder(GEP->getContext()); 
+    Builder.SetInsertPoint(GEP); 
+    Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); 
+    Value *Offsets; 
+    Value *Base = foldGEP(GEP, Offsets, Builder); 
+    // We only want to merge the geps if there is a real chance that they can be 
+    // used by an MVE gather; thus the offset has to have the correct size 
+    // (always i32 if it is not of vector type) and the base has to be a 
+    // pointer. 
+    if (Offsets && Base && Base != GEP) { 
+      PointerType *BaseType = cast<PointerType>(Base->getType()); 
+      GetElementPtrInst *NewAddress = GetElementPtrInst::Create( 
+          BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP); 
+      GEP->replaceAllUsesWith(NewAddress); 
+      GEP = NewAddress; 
+      Changed = true; 
+    } 
+  } 
+  Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI); 
+  return Changed; 
+} 
+ 
 bool MVEGatherScatterLowering::runOnFunction(Function &F) {
   if (!EnableMaskedGatherScatters)
     return false;
@@ -1166,18 +1166,18 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
   bool Changed = false;
 
   for (BasicBlock &BB : F) {
-    Changed |= SimplifyInstructionsInBlock(&BB);
-
+    Changed |= SimplifyInstructionsInBlock(&BB); 
+ 
     for (Instruction &I : BB) {
       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
-      if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
-          isa<FixedVectorType>(II->getType())) {
+      if (II && II->getIntrinsicID() == Intrinsic::masked_gather && 
+          isa<FixedVectorType>(II->getType())) { 
         Gathers.push_back(II);
-        Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI);
-      } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
-                 isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
+        Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI); 
+      } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter && 
+                 isa<FixedVectorType>(II->getArgOperand(0)->getType())) { 
         Scatters.push_back(II);
-        Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI);
+        Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI); 
       }
     }
   }
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
index 9ab5d92729..1bb23cc725 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredUtils.h
@@ -1,157 +1,157 @@
-//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains utility functions for low overhead and tail predicated
-// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
-// needs them.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
-#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
-
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-
-namespace llvm {
-
-static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-    break;
-  case ARM::MVE_VCTP8:
-    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
-  case ARM::MVE_VCTP16:
-    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
-  case ARM::MVE_VCTP32:
-    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
-  case ARM::MVE_VCTP64:
-    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
-  }
-  return 0;
-}
-
-static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-  case ARM::MVE_VCTP8:
-    return 16;
-  case ARM::MVE_VCTP16:
-    return 8;
-  case ARM::MVE_VCTP32:
-    return 4;
-  case ARM::MVE_VCTP64:
-    return 2;
-  }
-  return 0;
-}
-
-static inline bool isVCTP(const MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM::MVE_VCTP8:
-  case ARM::MVE_VCTP16:
-  case ARM::MVE_VCTP32:
-  case ARM::MVE_VCTP64:
-    return true;
-  }
-  return false;
-}
-
-static inline bool isLoopStart(MachineInstr &MI) {
-  return MI.getOpcode() == ARM::t2DoLoopStart ||
-         MI.getOpcode() == ARM::t2DoLoopStartTP ||
-         MI.getOpcode() == ARM::t2WhileLoopStart;
-}
-
-// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
-// beq that branches to the exit branch.
-inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
-                        unsigned BrOpc = ARM::t2Bcc) {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  // Cmp
-  MachineInstrBuilder MIB =
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
-  MIB.add(MI->getOperand(0));
-  MIB.addImm(0);
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(ARM::NoRegister);
-
-  // Branch
-  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1)); // branch target
-  MIB.addImm(ARMCC::EQ);      // condition code
-  MIB.addReg(ARM::CPSR);
-
-  MI->eraseFromParent();
-}
-
-inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
-  MachineBasicBlock *MBB = MI->getParent();
-  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
-      .add(MI->getOperand(0))
-      .add(MI->getOperand(1))
-      .add(predOps(ARMCC::AL));
-
-  MI->eraseFromParent();
-}
-
-inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
-                          bool SetFlags = false) {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  MachineInstrBuilder MIB =
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
-  MIB.add(MI->getOperand(0));
-  MIB.add(MI->getOperand(1));
-  MIB.add(MI->getOperand(2));
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(0);
-
-  if (SetFlags) {
-    MIB.addReg(ARM::CPSR);
-    MIB->getOperand(5).setIsDef(true);
-  } else
-    MIB.addReg(0);
-
-  MI->eraseFromParent();
-}
-
-// Generate a subs, or sub and cmp, and a branch instead of an LE.
-inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
-                          unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  // Create cmp
-  if (!SkipCmp) {
-    MachineInstrBuilder MIB =
-        BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
-    MIB.add(MI->getOperand(0));
-    MIB.addImm(0);
-    MIB.addImm(ARMCC::AL);
-    MIB.addReg(ARM::NoRegister);
-  }
-
-  // Create bne
-  MachineInstrBuilder MIB =
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1)); // branch target
-  MIB.addImm(ARMCC::NE);      // condition code
-  MIB.addReg(ARM::CPSR);
-  MI->eraseFromParent();
-}
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file contains utility functions for low overhead and tail predicated 
+// loops, shared between the ARMLowOverheadLoops pass and anywhere else that 
+// needs them. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H 
+#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H 
+ 
+#include "llvm/CodeGen/MachineInstr.h" 
+#include "llvm/CodeGen/MachineInstrBuilder.h" 
+#include "llvm/CodeGen/MachineOperand.h" 
+#include "llvm/CodeGen/TargetInstrInfo.h" 
+ 
+namespace llvm { 
+ 
+static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) { 
+  switch (Opcode) { 
+  default: 
+    llvm_unreachable("unhandled vctp opcode"); 
+    break; 
+  case ARM::MVE_VCTP8: 
+    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; 
+  case ARM::MVE_VCTP16: 
+    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; 
+  case ARM::MVE_VCTP32: 
+    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; 
+  case ARM::MVE_VCTP64: 
+    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; 
+  } 
+  return 0; 
+} 
+ 
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) { 
+  switch (Opcode) { 
+  default: 
+    llvm_unreachable("unhandled vctp opcode"); 
+  case ARM::MVE_VCTP8: 
+    return 16; 
+  case ARM::MVE_VCTP16: 
+    return 8; 
+  case ARM::MVE_VCTP32: 
+    return 4; 
+  case ARM::MVE_VCTP64: 
+    return 2; 
+  } 
+  return 0; 
+} 
+ 
+static inline bool isVCTP(const MachineInstr *MI) { 
+  switch (MI->getOpcode()) { 
+  default: 
+    break; 
+  case ARM::MVE_VCTP8: 
+  case ARM::MVE_VCTP16: 
+  case ARM::MVE_VCTP32: 
+  case ARM::MVE_VCTP64: 
+    return true; 
+  } 
+  return false; 
+} 
+ 
+static inline bool isLoopStart(MachineInstr &MI) { 
+  return MI.getOpcode() == ARM::t2DoLoopStart || 
+         MI.getOpcode() == ARM::t2DoLoopStartTP || 
+         MI.getOpcode() == ARM::t2WhileLoopStart; 
+} 
+ 
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a 
+// beq that branches to the exit branch. 
+inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII, 
+                        unsigned BrOpc = ARM::t2Bcc) { 
+  MachineBasicBlock *MBB = MI->getParent(); 
+ 
+  // Cmp 
+  MachineInstrBuilder MIB = 
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); 
+  MIB.add(MI->getOperand(0)); 
+  MIB.addImm(0); 
+  MIB.addImm(ARMCC::AL); 
+  MIB.addReg(ARM::NoRegister); 
+ 
+  // Branch 
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); 
+  MIB.add(MI->getOperand(1)); // branch target 
+  MIB.addImm(ARMCC::EQ);      // condition code 
+  MIB.addReg(ARM::CPSR); 
+ 
+  MI->eraseFromParent(); 
+} 
+ 
+inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) { 
+  MachineBasicBlock *MBB = MI->getParent(); 
+  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr)) 
+      .add(MI->getOperand(0)) 
+      .add(MI->getOperand(1)) 
+      .add(predOps(ARMCC::AL)); 
+ 
+  MI->eraseFromParent(); 
+} 
+ 
+inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, 
+                          bool SetFlags = false) { 
+  MachineBasicBlock *MBB = MI->getParent(); 
+ 
+  MachineInstrBuilder MIB = 
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 
+  MIB.add(MI->getOperand(0)); 
+  MIB.add(MI->getOperand(1)); 
+  MIB.add(MI->getOperand(2)); 
+  MIB.addImm(ARMCC::AL); 
+  MIB.addReg(0); 
+ 
+  if (SetFlags) { 
+    MIB.addReg(ARM::CPSR); 
+    MIB->getOperand(5).setIsDef(true); 
+  } else 
+    MIB.addReg(0); 
+ 
+  MI->eraseFromParent(); 
+} 
+ 
+// Generate a subs, or sub and cmp, and a branch instead of an LE. 
+inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, 
+                          unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) { 
+  MachineBasicBlock *MBB = MI->getParent(); 
+ 
+  // Create cmp 
+  if (!SkipCmp) { 
+    MachineInstrBuilder MIB = 
+        BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri)); 
+    MIB.add(MI->getOperand(0)); 
+    MIB.addImm(0); 
+    MIB.addImm(ARMCC::AL); 
+    MIB.addReg(ARM::NoRegister); 
+  } 
+ 
+  // Create bne 
+  MachineInstrBuilder MIB = 
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); 
+  MIB.add(MI->getOperand(1)); // branch target 
+  MIB.addImm(ARMCC::NE);      // condition code 
+  MIB.addReg(ARM::CPSR); 
+  MI->eraseFromParent(); 
+} 
+ 
+} // end namespace llvm 
+ 
+#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
index cccac55952..94e71f1d60 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVETailPredication.cpp
@@ -22,13 +22,13 @@
 /// The HardwareLoops pass inserts intrinsics identifying loops that the
 /// backend will attempt to convert into a low-overhead loop. The vectorizer is
 /// responsible for generating a vectorized loop in which the lanes are
-/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these
-/// get.active.lane.mask intrinsic and attempts to convert them to VCTP
-/// instructions. This will be picked up by the ARM Low-overhead loop pass later
-/// in the backend, which performs the final transformation to a DLSTP or WLSTP
-/// tail-predicated loop.
-//
-//===----------------------------------------------------------------------===//
+/// predicated upon an get.active.lane.mask intrinsic. This pass looks at these 
+/// get.active.lane.mask intrinsic and attempts to convert them to VCTP 
+/// instructions. This will be picked up by the ARM Low-overhead loop pass later 
+/// in the backend, which performs the final transformation to a DLSTP or WLSTP 
+/// tail-predicated loop. 
+// 
+//===----------------------------------------------------------------------===// 
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
@@ -47,7 +47,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/Local.h" 
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 
@@ -57,8 +57,8 @@ using namespace llvm;
 #define DESC "Transform predicated vector loops to use MVE tail predication"
 
 cl::opt<TailPredication::Mode> EnableTailPredication(
-   "tail-predication", cl::desc("MVE tail-predication pass options"),
-   cl::init(TailPredication::Enabled),
+   "tail-predication", cl::desc("MVE tail-predication pass options"), 
+   cl::init(TailPredication::Enabled), 
    cl::values(clEnumValN(TailPredication::Disabled, "disabled",
                          "Don't tail-predicate loops"),
               clEnumValN(TailPredication::EnabledNoReductions,
@@ -103,18 +103,18 @@ public:
   bool runOnLoop(Loop *L, LPPassManager&) override;
 
 private:
-  /// Perform the relevant checks on the loop and convert active lane masks if
-  /// possible.
-  bool TryConvertActiveLaneMask(Value *TripCount);
+  /// Perform the relevant checks on the loop and convert active lane masks if 
+  /// possible. 
+  bool TryConvertActiveLaneMask(Value *TripCount); 
 
-  /// Perform several checks on the arguments of @llvm.get.active.lane.mask
-  /// intrinsic. E.g., check that the loop induction variable and the element
-  /// count are of the form we expect, and also perform overflow checks for
-  /// the new expressions that are created.
-  bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+  /// Perform several checks on the arguments of @llvm.get.active.lane.mask 
+  /// intrinsic. E.g., check that the loop induction variable and the element 
+  /// count are of the form we expect, and also perform overflow checks for 
+  /// the new expressions that are created. 
+  bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount); 
 
   /// Insert the intrinsic to represent the effect of tail predication.
-  void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+  void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount); 
 
   /// Rematerialize the iteration count in exit blocks, which enables
   /// ARMLowOverheadLoops to better optimise away loop update statements inside
@@ -155,7 +155,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
         continue;
 
       Intrinsic::ID ID = Call->getIntrinsicID();
-      if (ID == Intrinsic::start_loop_iterations ||
+      if (ID == Intrinsic::start_loop_iterations || 
           ID == Intrinsic::test_set_loop_iterations)
         return cast<IntrinsicInst>(&I);
     }
@@ -174,23 +174,23 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
       return false;
   }
 
-  LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n");
+  LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"); 
 
-  bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0));
+  bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0)); 
 
-  return Changed;
+  return Changed; 
 }
 
 // The active lane intrinsic has this form:
 //
-//    @llvm.get.active.lane.mask(IV, TC)
+//    @llvm.get.active.lane.mask(IV, TC) 
 //
 // Here we perform checks that this intrinsic behaves as expected,
 // which means:
 //
-// 1) Check that the TripCount (TC) belongs to this loop (originally).
-// 2) The element count (TC) needs to be sufficiently large that the decrement
-//    of element counter doesn't overflow, which means that we need to prove:
+// 1) Check that the TripCount (TC) belongs to this loop (originally). 
+// 2) The element count (TC) needs to be sufficiently large that the decrement 
+//    of element counter doesn't overflow, which means that we need to prove: 
 //        ceil(ElementCount / VectorWidth) >= TripCount
 //    by rounding up ElementCount up:
 //        ((ElementCount + (VectorWidth - 1)) / VectorWidth
@@ -199,122 +199,122 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
 // 3) The IV must be an induction phi with an increment equal to the
 //    vector width.
 bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
-                                          Value *TripCount) {
+                                          Value *TripCount) { 
   bool ForceTailPredication =
     EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
     EnableTailPredication == TailPredication::ForceEnabled;
 
-  Value *ElemCount = ActiveLaneMask->getOperand(1);
-  bool Changed = false;
-  if (!L->makeLoopInvariant(ElemCount, Changed))
-    return false;
-
-  auto *EC= SE->getSCEV(ElemCount);
-  auto *TC = SE->getSCEV(TripCount);
-  int VectorWidth =
-      cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
-  if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16)
-    return false;
-  ConstantInt *ConstElemCount = nullptr;
-
-  // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
-  // this loop.  The scalar tripcount corresponds the number of elements
-  // processed by the loop, so we will refer to that from this point on.
-  if (!SE->isLoopInvariant(EC, L)) {
-    LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
+  Value *ElemCount = ActiveLaneMask->getOperand(1); 
+  bool Changed = false; 
+  if (!L->makeLoopInvariant(ElemCount, Changed)) 
+    return false; 
+ 
+  auto *EC= SE->getSCEV(ElemCount); 
+  auto *TC = SE->getSCEV(TripCount); 
+  int VectorWidth = 
+      cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements(); 
+  if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16) 
+    return false; 
+  ConstantInt *ConstElemCount = nullptr; 
+
+  // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to 
+  // this loop.  The scalar tripcount corresponds the number of elements 
+  // processed by the loop, so we will refer to that from this point on. 
+  if (!SE->isLoopInvariant(EC, L)) { 
+    LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n"); 
     return false;
   }
 
-  if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
-    ConstantInt *TC = dyn_cast<ConstantInt>(TripCount);
-    if (!TC) {
-      LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
-                           "set.loop.iterations\n");
-      return false;
-    }
-
-    // Calculate 2 tripcount values and check that they are consistent with
-    // each other. The TripCount for a predicated vector loop body is
-    // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we
-    // work it out here.
-    uint64_t TC1 = TC->getZExtValue();
-    uint64_t TC2 =
-        (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth;
-
-    // If the tripcount values are inconsistent, we can't insert the VCTP and
-    // trigger tail-predication; keep the intrinsic as a get.active.lane.mask
-    // and legalize this.
-    if (TC1 != TC2) {
-      LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
-                 << TC1 << " from set.loop.iterations, and "
-                 << TC2 << " from get.active.lane.mask\n");
-      return false;
-    }
-  } else if (!ForceTailPredication) {
-    // 2) We need to prove that the sub expression that we create in the
-    // tail-predicated loop body, which calculates the remaining elements to be
-    // processed, is non-negative, i.e. it doesn't overflow:
-    //
-    //   ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0
-    //
-    // This is true if:
-    //
-    //    TripCount == (ElementCount + VectorWidth - 1) / VectorWidth
-    //
-    // which what we will be using here.
-    //
-    auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
-    // ElementCount + (VW-1):
-    auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
-        SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
-
-    // Ceil = ElementCount + (VW-1) / VW
-    auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW);
-
-    // Prevent unused variable warnings with TC
-    (void)TC;
-    LLVM_DEBUG(
-      dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
-      dbgs() << "ARM TP: - TripCount = "; TC->dump();
-      dbgs() << "ARM TP: - ElemCount = "; EC->dump();
-      dbgs() << "ARM TP: - VecWidth =  " << VectorWidth << "\n";
-      dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
-    );
-
-    // As an example, almost all the tripcount expressions (produced by the
-    // vectoriser) look like this:
-    //
-    //   TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
-    //
-    // and "ElementCount + (VW-1) / VW":
-    //
-    //   Ceil = ((3 + %N) /u 4)
-    //
-    // Check for equality of TC and Ceil by calculating SCEV expression
-    // TC - Ceil and test it for zero.
-    //
-    bool Zero = SE->getMinusSCEV(
-                      SE->getBackedgeTakenCount(L),
-                      SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
-                                                     SE->getNegativeSCEV(VW)),
-                                      VW))
-                    ->isZero();
-
-    if (!Zero) {
-      LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
-      return false;
-    }
+  if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) { 
+    ConstantInt *TC = dyn_cast<ConstantInt>(TripCount); 
+    if (!TC) { 
+      LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in " 
+                           "set.loop.iterations\n"); 
+      return false; 
+    } 
+
+    // Calculate 2 tripcount values and check that they are consistent with 
+    // each other. The TripCount for a predicated vector loop body is 
+    // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we 
+    // work it out here. 
+    uint64_t TC1 = TC->getZExtValue(); 
+    uint64_t TC2 = 
+        (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth; 
+
+    // If the tripcount values are inconsistent, we can't insert the VCTP and 
+    // trigger tail-predication; keep the intrinsic as a get.active.lane.mask 
+    // and legalize this. 
+    if (TC1 != TC2) { 
+      LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: " 
+                 << TC1 << " from set.loop.iterations, and " 
+                 << TC2 << " from get.active.lane.mask\n"); 
+      return false; 
+    } 
+  } else if (!ForceTailPredication) { 
+    // 2) We need to prove that the sub expression that we create in the 
+    // tail-predicated loop body, which calculates the remaining elements to be 
+    // processed, is non-negative, i.e. it doesn't overflow: 
+    // 
+    //   ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0 
+    // 
+    // This is true if: 
+    // 
+    //    TripCount == (ElementCount + VectorWidth - 1) / VectorWidth 
+    // 
+    // which what we will be using here. 
+    // 
+    auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth)); 
+    // ElementCount + (VW-1): 
+    auto *ECPlusVWMinus1 = SE->getAddExpr(EC, 
+        SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1))); 
+
+    // Ceil = ElementCount + (VW-1) / VW 
+    auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW); 
+ 
+    // Prevent unused variable warnings with TC 
+    (void)TC; 
+    LLVM_DEBUG( 
+      dbgs() << "ARM TP: Analysing overflow behaviour for:\n"; 
+      dbgs() << "ARM TP: - TripCount = "; TC->dump(); 
+      dbgs() << "ARM TP: - ElemCount = "; EC->dump(); 
+      dbgs() << "ARM TP: - VecWidth =  " << VectorWidth << "\n"; 
+      dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump(); 
+    ); 
+ 
+    // As an example, almost all the tripcount expressions (produced by the 
+    // vectoriser) look like this: 
+    // 
+    //   TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4) 
+    // 
+    // and "ElementCount + (VW-1) / VW": 
+    // 
+    //   Ceil = ((3 + %N) /u 4) 
+    // 
+    // Check for equality of TC and Ceil by calculating SCEV expression 
+    // TC - Ceil and test it for zero. 
+    // 
+    bool Zero = SE->getMinusSCEV( 
+                      SE->getBackedgeTakenCount(L), 
+                      SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW), 
+                                                     SE->getNegativeSCEV(VW)), 
+                                      VW)) 
+                    ->isZero(); 
+ 
+    if (!Zero) { 
+      LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n"); 
+      return false; 
+    } 
   }
 
-  // 3) Find out if IV is an induction phi. Note that we can't use Loop
+  // 3) Find out if IV is an induction phi. Note that we can't use Loop 
   // helpers here to get the induction variable, because the hardware loop is
-  // no longer in loopsimplify form, and also the hwloop intrinsic uses a
-  // different counter. Using SCEV, we check that the induction is of the
+  // no longer in loopsimplify form, and also the hwloop intrinsic uses a 
+  // different counter. Using SCEV, we check that the induction is of the 
   // form i = i + 4, where the increment must be equal to the VectorWidth.
   auto *IV = ActiveLaneMask->getOperand(0);
   auto *IVExpr = SE->getSCEV(IV);
   auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
-
+ 
   if (!AddExpr) {
     LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
     return false;
@@ -324,11 +324,11 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
     LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
     return false;
   }
-  auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
-  if (!Base || !Base->isZero()) {
-    LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
-    return false;
-  }
+  auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0)); 
+  if (!Base || !Base->isZero()) { 
+    LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n"); 
+    return false; 
+  } 
   auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
   if (!Step) {
     LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
@@ -339,29 +339,29 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
   if (VectorWidth == StepValue)
     return true;
 
-  LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
-                    << " doesn't match vector width " << VectorWidth << "\n");
+  LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue 
+                    << " doesn't match vector width " << VectorWidth << "\n"); 
 
   return false;
 }
 
 void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
-                                             Value *TripCount) {
+                                             Value *TripCount) { 
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   Module *M = L->getHeader()->getModule();
   Type *Ty = IntegerType::get(M->getContext(), 32);
-  unsigned VectorWidth =
-      cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
+  unsigned VectorWidth = 
+      cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements(); 
 
   // Insert a phi to count the number of elements processed by the loop.
-  Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
+  Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI()); 
   PHINode *Processed = Builder.CreatePHI(Ty, 2);
-  Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
+  Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader()); 
 
-  // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
-  // thus represent the effect of tail predication.
+  // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and 
+  // thus represent the effect of tail predication. 
   Builder.SetInsertPoint(ActiveLaneMask);
-  ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth);
+  ConstantInt *Factor = ConstantInt::get(cast<IntegerType>(Ty), VectorWidth); 
 
   Intrinsic::ID VCTPID;
   switch (VectorWidth) {
@@ -390,36 +390,36 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
              << "ARM TP: Inserted VCTP: " << *VCTPCall << "\n");
 }
 
-bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
-  SmallVector<IntrinsicInst *, 4> ActiveLaneMasks;
-  for (auto *BB : L->getBlocks())
-    for (auto &I : *BB)
-      if (auto *Int = dyn_cast<IntrinsicInst>(&I))
-        if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask)
-          ActiveLaneMasks.push_back(Int);
-
-  if (ActiveLaneMasks.empty())
+bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) { 
+  SmallVector<IntrinsicInst *, 4> ActiveLaneMasks; 
+  for (auto *BB : L->getBlocks()) 
+    for (auto &I : *BB) 
+      if (auto *Int = dyn_cast<IntrinsicInst>(&I)) 
+        if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask) 
+          ActiveLaneMasks.push_back(Int); 
+ 
+  if (ActiveLaneMasks.empty()) 
     return false;
 
   LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
 
-  for (auto *ActiveLaneMask : ActiveLaneMasks) {
+  for (auto *ActiveLaneMask : ActiveLaneMasks) { 
     LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
                       << *ActiveLaneMask << "\n");
 
-    if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
+    if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) { 
       LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
       return false;
     }
     LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
-    InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
+    InsertVCTPIntrinsic(ActiveLaneMask, TripCount); 
   }
 
-  // Remove dead instructions and now dead phis.
-  for (auto *II : ActiveLaneMasks)
-    RecursivelyDeleteTriviallyDeadInstructions(II);
-  for (auto I : L->blocks())
-    DeleteDeadPHIs(I);
+  // Remove dead instructions and now dead phis. 
+  for (auto *II : ActiveLaneMasks) 
+    RecursivelyDeleteTriviallyDeadInstructions(II); 
+  for (auto I : L->blocks()) 
+    DeleteDeadPHIs(I); 
   return true;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
index c7f451cba1..89183c16ac 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTBlockPass.cpp
@@ -107,12 +107,12 @@ static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter,
   NumInstrsSteppedOver = 0;
 
   while (Iter != EndIter) {
-    if (Iter->isDebugInstr()) {
-      // Skip debug instructions
-      ++Iter;
-      continue;
-    }
-
+    if (Iter->isDebugInstr()) { 
+      // Skip debug instructions 
+      ++Iter; 
+      continue; 
+    } 
+ 
     NextPred = getVPTInstrPredicate(*Iter, PredReg);
     assert(NextPred != ARMVCC::Else &&
            "VPT block pass does not expect Else preds");
@@ -176,8 +176,8 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
   LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter =
                       std::next(BlockBeg);
                   AddedInstIter != Iter; ++AddedInstIter) {
-    if (AddedInstIter->isDebugInstr())
-      continue;
+    if (AddedInstIter->isDebugInstr()) 
+      continue; 
     dbgs() << "  adding: ";
     AddedInstIter->dump();
   });
@@ -205,7 +205,7 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
     if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
       break;
 
-    LLVM_DEBUG(dbgs() << "  removing VPNOT: "; Iter->dump());
+    LLVM_DEBUG(dbgs() << "  removing VPNOT: "; Iter->dump()); 
 
     // Record the new size of the block
     BlockSize += ElseInstCnt;
@@ -219,9 +219,9 @@ CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter,
     // Note that we are using "Iter" to iterate over the block so we can update
     // it at the same time.
     for (; Iter != VPNOTBlockEndIter; ++Iter) {
-      if (Iter->isDebugInstr())
-        continue;
-
+      if (Iter->isDebugInstr()) 
+        continue; 
+ 
       // Find the register in which the predicate is
       int OpIdx = findFirstVPTPredOperandIdx(*Iter);
       assert(OpIdx != -1);
@@ -281,27 +281,27 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
       MIBuilder.add(VCMP->getOperand(1));
       MIBuilder.add(VCMP->getOperand(2));
       MIBuilder.add(VCMP->getOperand(3));
-
-      // We need to remove any kill flags between the original VCMP and the new
-      // insertion point.
-      for (MachineInstr &MII :
-           make_range(VCMP->getIterator(), MI->getIterator())) {
-        MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
-        MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
-      }
-
+ 
+      // We need to remove any kill flags between the original VCMP and the new 
+      // insertion point. 
+      for (MachineInstr &MII : 
+           make_range(VCMP->getIterator(), MI->getIterator())) { 
+        MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI); 
+        MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI); 
+      } 
+ 
       VCMP->eraseFromParent();
     } else {
       MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
       MIBuilder.addImm((uint64_t)BlockMask);
     }
 
-    // Erase all dead instructions (VPNOT's). Do that now so that they do not
-    // mess with the bundle creation.
-    for (MachineInstr *DeadMI : DeadInstructions)
-      DeadMI->eraseFromParent();
-    DeadInstructions.clear();
-
+    // Erase all dead instructions (VPNOT's). Do that now so that they do not 
+    // mess with the bundle creation. 
+    for (MachineInstr *DeadMI : DeadInstructions) 
+      DeadMI->eraseFromParent(); 
+    DeadInstructions.clear(); 
+ 
     finalizeBundle(
         Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
 
diff --git a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
index 00e4449769..70fb8c5383 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -6,28 +6,28 @@
 //
 //===----------------------------------------------------------------------===//
 //
-/// \file This pass does a few optimisations related to Tail predicated loops
-/// and MVE VPT blocks before register allocation is performed. For VPT blocks
-/// the goal is to maximize the sizes of the blocks that will be created by the
-/// MVE VPT Block Insertion pass (which runs after register allocation). For
-/// tail predicated loops we transform the loop into something that will
-/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
-///
+/// \file This pass does a few optimisations related to Tail predicated loops 
+/// and MVE VPT blocks before register allocation is performed. For VPT blocks 
+/// the goal is to maximize the sizes of the blocks that will be created by the 
+/// MVE VPT Block Insertion pass (which runs after register allocation). For 
+/// tail predicated loops we transform the loop into something that will 
+/// hopefully make the backend ARMLowOverheadLoops pass's job easier. 
+/// 
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
-#include "MVETailPredUtils.h"
+#include "MVETailPredUtils.h" 
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h" 
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineLoopInfo.h" 
+#include "llvm/InitializePasses.h" 
 #include "llvm/Support/Debug.h"
 #include <cassert>
 
@@ -35,11 +35,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "arm-mve-vpt-opts"
 
-static cl::opt<bool>
-MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
-    cl::desc("Enable merging Loop End and Dec instructions."),
-    cl::init(true));
-
+static cl::opt<bool> 
+MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, 
+    cl::desc("Enable merging Loop End and Dec instructions."), 
+    cl::init(true)); 
+ 
 namespace {
 class MVEVPTOptimisations : public MachineFunctionPass {
 public:
@@ -51,315 +51,315 @@ public:
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineLoopInfo>();
-    AU.addPreserved<MachineLoopInfo>();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
+  void getAnalysisUsage(AnalysisUsage &AU) const override { 
+    AU.addRequired<MachineLoopInfo>(); 
+    AU.addPreserved<MachineLoopInfo>(); 
+    AU.addRequired<MachineDominatorTree>(); 
+    AU.addPreserved<MachineDominatorTree>(); 
+    MachineFunctionPass::getAnalysisUsage(AU); 
+  } 
+ 
   StringRef getPassName() const override {
-    return "ARM MVE TailPred and VPT Optimisation Pass";
+    return "ARM MVE TailPred and VPT Optimisation Pass"; 
   }
 
 private:
-  bool MergeLoopEnd(MachineLoop *ML);
-  bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
+  bool MergeLoopEnd(MachineLoop *ML); 
+  bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); 
   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
                                             MachineInstr &Instr,
                                             MachineOperand &User,
                                             Register Target);
   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
-  bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
-  bool ConvertVPSEL(MachineBasicBlock &MBB);
+  bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); 
+  bool ConvertVPSEL(MachineBasicBlock &MBB); 
 };
 
 char MVEVPTOptimisations::ID = 0;
 
 } // end anonymous namespace
 
-INITIALIZE_PASS_BEGIN(MVEVPTOptimisations, DEBUG_TYPE,
-                      "ARM MVE TailPred and VPT Optimisations pass", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(MVEVPTOptimisations, DEBUG_TYPE,
-                    "ARM MVE TailPred and VPT Optimisations pass", false, false)
-
-static MachineInstr *LookThroughCOPY(MachineInstr *MI,
-                                     MachineRegisterInfo *MRI) {
-  while (MI && MI->getOpcode() == TargetOpcode::COPY &&
-         MI->getOperand(1).getReg().isVirtual())
-    MI = MRI->getVRegDef(MI->getOperand(1).getReg());
-  return MI;
-}
-
-// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
-// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
-// at the moment, returning a t2DoLoopStart in LoopStart.
-static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
-                               MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
-                               MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
-  MachineBasicBlock *Header = ML->getHeader();
-  MachineBasicBlock *Latch = ML->getLoopLatch();
-  if (!Header || !Latch) {
-    LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
-    return false;
-  }
-
-  // Find the loop end from the terminators.
-  LoopEnd = nullptr;
-  for (auto &T : Latch->terminators()) {
-    if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
-      LoopEnd = &T;
-      break;
-    }
-    if (T.getOpcode() == ARM::t2LoopEndDec &&
-        T.getOperand(2).getMBB() == Header) {
-      LoopEnd = &T;
-      break;
-    }
-  }
-  if (!LoopEnd) {
-    LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
-    return false;
-  }
-  LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
-
-  // Find the dec from the use of the end. There may be copies between
-  // instructions. We expect the loop to loop like:
-  //   $vs = t2DoLoopStart ...
-  // loop:
-  //   $vp = phi [ $vs ], [ $vd ]
-  //   ...
-  //   $vd = t2LoopDec $vp
-  //   ...
-  //   t2LoopEnd $vd, loop
-  if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
-    LoopDec = LoopEnd;
-  else {
-    LoopDec =
-        LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
-    if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
-      LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
-      return false;
-    }
-  }
-  LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
-
-  LoopPhi =
-      LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
-  if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
-      LoopPhi->getNumOperands() != 5 ||
-      (LoopPhi->getOperand(2).getMBB() != Latch &&
-       LoopPhi->getOperand(4).getMBB() != Latch)) {
-    LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
-    return false;
-  }
-  LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
-
-  Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
-                          ? LoopPhi->getOperand(3).getReg()
-                          : LoopPhi->getOperand(1).getReg();
-  LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
-  if (!LoopStart || LoopStart->getOpcode() != ARM::t2DoLoopStart) {
-    LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
-    return false;
-  }
-  LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
-
-  return true;
-}
-
-// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
-// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
-// will be valid to be used for the low overhead loop, which means nothing else
-// is using LR (especially calls) and there are no superfluous copies in the
-// loop. The t2LoopEndDec is a branching terminator that produces a value (the
-// decrement) around the loop edge, which means we need to be careful that they
-// will be valid to allocate without any spilling.
-bool MVEVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
-  if (!MergeEndDec)
-    return false;
-
-  LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
-                    << "\n");
-
-  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
-  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
-    return false;
-
-  // Check if there is an illegal instruction (a call) in the low overhead loop
-  // and if so revert it now before we get any further.
-  for (MachineBasicBlock *MBB : ML->blocks()) {
-    for (MachineInstr &MI : *MBB) {
-      if (MI.isCall()) {
-        LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
-        RevertDoLoopStart(LoopStart, TII);
-        RevertLoopDec(LoopDec, TII);
-        RevertLoopEnd(LoopEnd, TII);
-        return true;
-      }
-    }
-  }
-
-  // Remove any copies from the loop, to ensure the phi that remains is both
-  // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
-  // that cannot spill, we need to be careful what remains in the loop.
-  Register PhiReg = LoopPhi->getOperand(0).getReg();
-  Register DecReg = LoopDec->getOperand(0).getReg();
-  Register StartReg = LoopStart->getOperand(0).getReg();
-  // Ensure the uses are expected, and collect any copies we want to remove.
-  SmallVector<MachineInstr *, 4> Copies;
-  auto CheckUsers = [&Copies](Register BaseReg,
-                              ArrayRef<MachineInstr *> ExpectedUsers,
-                              MachineRegisterInfo *MRI) {
-    SmallVector<Register, 4> Worklist;
-    Worklist.push_back(BaseReg);
-    while (!Worklist.empty()) {
-      Register Reg = Worklist.pop_back_val();
-      for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
-        if (count(ExpectedUsers, &MI))
-          continue;
-        if (MI.getOpcode() != TargetOpcode::COPY ||
-            !MI.getOperand(0).getReg().isVirtual()) {
-          LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
-          return false;
-        }
-        Worklist.push_back(MI.getOperand(0).getReg());
-        Copies.push_back(&MI);
-      }
-    }
-    return true;
-  };
-  if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
-      !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
-      !CheckUsers(StartReg, {LoopPhi}, MRI))
-    return false;
-
-  MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
-  MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
-  MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
-
-  if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
-    LoopPhi->getOperand(3).setReg(StartReg);
-    LoopPhi->getOperand(1).setReg(DecReg);
-  } else {
-    LoopPhi->getOperand(1).setReg(StartReg);
-    LoopPhi->getOperand(3).setReg(DecReg);
-  }
-
-  // Replace the loop dec and loop end as a single instruction.
-  MachineInstrBuilder MI =
-      BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
-              TII->get(ARM::t2LoopEndDec), DecReg)
-          .addReg(PhiReg)
-          .add(LoopEnd->getOperand(1));
-  (void)MI;
-  LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
-
-  LoopDec->eraseFromParent();
-  LoopEnd->eraseFromParent();
-  for (auto *MI : Copies)
-    MI->eraseFromParent();
-  return true;
-}
-
-// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
-// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
-// instruction, making the backend ARMLowOverheadLoops passes job of finding the
-// VCTP operand much simpler.
-bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
-                                              MachineDominatorTree *DT) {
-  LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
-                    << ML->getHeader()->getName() << "\n");
-
-  // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
-  // in the loop.
-  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
-  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
-    return false;
-  if (LoopDec != LoopEnd)
-    return false;
-
-  SmallVector<MachineInstr *, 4> VCTPs;
-  for (MachineBasicBlock *BB : ML->blocks())
-    for (MachineInstr &MI : *BB)
-      if (isVCTP(&MI))
-        VCTPs.push_back(&MI);
-
-  if (VCTPs.empty()) {
-    LLVM_DEBUG(dbgs() << "  no VCTPs\n");
-    return false;
-  }
-
-  // Check all VCTPs are the same.
-  MachineInstr *FirstVCTP = *VCTPs.begin();
-  for (MachineInstr *VCTP : VCTPs) {
-    LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
-    if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
-        VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
-      LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
-      return false;
-    }
-  }
-
-  // Check for the register being used can be setup before the loop. We expect
-  // this to be:
-  //   $vx = ...
-  // loop:
-  //   $vp = PHI [ $vx ], [ $vd ]
-  //   ..
-  //   $vpr = VCTP $vp
-  //   ..
-  //   $vd = t2SUBri $vp, #n
-  //   ..
-  Register CountReg = FirstVCTP->getOperand(1).getReg();
-  if (!CountReg.isVirtual()) {
-    LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
-    return false;
-  }
-  MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
-  if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
-      Phi->getNumOperands() != 5 ||
-      (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
-       Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
-    LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
-    return false;
-  }
-  CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
-                 ? Phi->getOperand(3).getReg()
-                 : Phi->getOperand(1).getReg();
-
-  // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
-  // the preheader and add the new CountReg to it. We attempt to place it late
-  // in the preheader, but may need to move that earlier based on uses.
-  MachineBasicBlock *MBB = LoopStart->getParent();
-  MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
-  for (MachineInstr &Use :
-       MRI->use_instructions(LoopStart->getOperand(0).getReg()))
-    if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
-        !DT->dominates(ML->getHeader(), Use.getParent())) {
-      LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
-      return false;
-    }
-
-  MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
-                                   TII->get(ARM::t2DoLoopStartTP))
-                               .add(LoopStart->getOperand(0))
-                               .add(LoopStart->getOperand(1))
-                               .addReg(CountReg);
-  (void)MI;
-  LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
-                    << *MI.getInstr());
-  MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
-  LoopStart->eraseFromParent();
-
-  return true;
-}
-
+INITIALIZE_PASS_BEGIN(MVEVPTOptimisations, DEBUG_TYPE, 
+                      "ARM MVE TailPred and VPT Optimisations pass", false, 
+                      false) 
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 
+INITIALIZE_PASS_END(MVEVPTOptimisations, DEBUG_TYPE, 
+                    "ARM MVE TailPred and VPT Optimisations pass", false, false) 
+
+static MachineInstr *LookThroughCOPY(MachineInstr *MI, 
+                                     MachineRegisterInfo *MRI) { 
+  while (MI && MI->getOpcode() == TargetOpcode::COPY && 
+         MI->getOperand(1).getReg().isVirtual()) 
+    MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 
+  return MI; 
+} 
+ 
+// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and 
+// corresponding PHI that make up a low overhead loop. Only handles 'do' loops 
+// at the moment, returning a t2DoLoopStart in LoopStart. 
+static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, 
+                               MachineInstr *&LoopStart, MachineInstr *&LoopPhi, 
+                               MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { 
+  MachineBasicBlock *Header = ML->getHeader(); 
+  MachineBasicBlock *Latch = ML->getLoopLatch(); 
+  if (!Header || !Latch) { 
+    LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n"); 
+    return false; 
+  } 
+ 
+  // Find the loop end from the terminators. 
+  LoopEnd = nullptr; 
+  for (auto &T : Latch->terminators()) { 
+    if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) { 
+      LoopEnd = &T; 
+      break; 
+    } 
+    if (T.getOpcode() == ARM::t2LoopEndDec && 
+        T.getOperand(2).getMBB() == Header) { 
+      LoopEnd = &T; 
+      break; 
+    } 
+  } 
+  if (!LoopEnd) { 
+    LLVM_DEBUG(dbgs() << "  no LoopEnd\n"); 
+    return false; 
+  } 
+  LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd); 
+ 
+  // Find the dec from the use of the end. There may be copies between 
+  // instructions. We expect the loop to loop like: 
+  //   $vs = t2DoLoopStart ... 
+  // loop: 
+  //   $vp = phi [ $vs ], [ $vd ] 
+  //   ... 
+  //   $vd = t2LoopDec $vp 
+  //   ... 
+  //   t2LoopEnd $vd, loop 
+  if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) 
+    LoopDec = LoopEnd; 
+  else { 
+    LoopDec = 
+        LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI); 
+    if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { 
+      LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n"); 
+      return false; 
+    } 
+  } 
+  LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec); 
+ 
+  LoopPhi = 
+      LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI); 
+  if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || 
+      LoopPhi->getNumOperands() != 5 || 
+      (LoopPhi->getOperand(2).getMBB() != Latch && 
+       LoopPhi->getOperand(4).getMBB() != Latch)) { 
+    LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n"); 
+    return false; 
+  } 
+  LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi); 
+ 
+  Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch 
+                          ? LoopPhi->getOperand(3).getReg() 
+                          : LoopPhi->getOperand(1).getReg(); 
+  LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI); 
+  if (!LoopStart || LoopStart->getOpcode() != ARM::t2DoLoopStart) { 
+    LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n"); 
+    return false; 
+  } 
+  LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart); 
+ 
+  return true; 
+} 
+ 
+// This function converts loops with t2LoopEnd and t2LoopEnd instructions into 
+// a single t2LoopEndDec instruction. To do that it needs to make sure that LR 
+// will be valid to be used for the low overhead loop, which means nothing else 
+// is using LR (especially calls) and there are no superfluous copies in the 
+// loop. The t2LoopEndDec is a branching terminator that produces a value (the 
+// decrement) around the loop edge, which means we need to be careful that they 
+// will be valid to allocate without any spilling. 
+bool MVEVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { 
+  if (!MergeEndDec) 
+    return false; 
+ 
+  LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName() 
+                    << "\n"); 
+ 
+  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 
+  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 
+    return false; 
+ 
+  // Check if there is an illegal instruction (a call) in the low overhead loop 
+  // and if so revert it now before we get any further. 
+  for (MachineBasicBlock *MBB : ML->blocks()) { 
+    for (MachineInstr &MI : *MBB) { 
+      if (MI.isCall()) { 
+        LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI); 
+        RevertDoLoopStart(LoopStart, TII); 
+        RevertLoopDec(LoopDec, TII); 
+        RevertLoopEnd(LoopEnd, TII); 
+        return true; 
+      } 
+    } 
+  } 
+ 
+  // Remove any copies from the loop, to ensure the phi that remains is both 
+  // simpler and contains no extra uses. Because t2LoopEndDec is a terminator 
+  // that cannot spill, we need to be careful what remains in the loop. 
+  Register PhiReg = LoopPhi->getOperand(0).getReg(); 
+  Register DecReg = LoopDec->getOperand(0).getReg(); 
+  Register StartReg = LoopStart->getOperand(0).getReg(); 
+  // Ensure the uses are expected, and collect any copies we want to remove. 
+  SmallVector<MachineInstr *, 4> Copies; 
+  auto CheckUsers = [&Copies](Register BaseReg, 
+                              ArrayRef<MachineInstr *> ExpectedUsers, 
+                              MachineRegisterInfo *MRI) { 
+    SmallVector<Register, 4> Worklist; 
+    Worklist.push_back(BaseReg); 
+    while (!Worklist.empty()) { 
+      Register Reg = Worklist.pop_back_val(); 
+      for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 
+        if (count(ExpectedUsers, &MI)) 
+          continue; 
+        if (MI.getOpcode() != TargetOpcode::COPY || 
+            !MI.getOperand(0).getReg().isVirtual()) { 
+          LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI); 
+          return false; 
+        } 
+        Worklist.push_back(MI.getOperand(0).getReg()); 
+        Copies.push_back(&MI); 
+      } 
+    } 
+    return true; 
+  }; 
+  if (!CheckUsers(PhiReg, {LoopDec}, MRI) || 
+      !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || 
+      !CheckUsers(StartReg, {LoopPhi}, MRI)) 
+    return false; 
+ 
+  MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass); 
+  MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass); 
+  MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass); 
+ 
+  if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) { 
+    LoopPhi->getOperand(3).setReg(StartReg); 
+    LoopPhi->getOperand(1).setReg(DecReg); 
+  } else { 
+    LoopPhi->getOperand(1).setReg(StartReg); 
+    LoopPhi->getOperand(3).setReg(DecReg); 
+  } 
+ 
+  // Replace the loop dec and loop end as a single instruction. 
+  MachineInstrBuilder MI = 
+      BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), 
+              TII->get(ARM::t2LoopEndDec), DecReg) 
+          .addReg(PhiReg) 
+          .add(LoopEnd->getOperand(1)); 
+  (void)MI; 
+  LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr()); 
+ 
+  LoopDec->eraseFromParent(); 
+  LoopEnd->eraseFromParent(); 
+  for (auto *MI : Copies) 
+    MI->eraseFromParent(); 
+  return true; 
+} 
+ 
+// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP 
+// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP 
+// instruction, making the backend ARMLowOverheadLoops passes job of finding the 
+// VCTP operand much simpler. 
+bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, 
+                                              MachineDominatorTree *DT) { 
+  LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " 
+                    << ML->getHeader()->getName() << "\n"); 
+ 
+  // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's 
+  // in the loop. 
+  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 
+  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 
+    return false; 
+  if (LoopDec != LoopEnd) 
+    return false; 
+ 
+  SmallVector<MachineInstr *, 4> VCTPs; 
+  for (MachineBasicBlock *BB : ML->blocks()) 
+    for (MachineInstr &MI : *BB) 
+      if (isVCTP(&MI)) 
+        VCTPs.push_back(&MI); 
+ 
+  if (VCTPs.empty()) { 
+    LLVM_DEBUG(dbgs() << "  no VCTPs\n"); 
+    return false; 
+  } 
+ 
+  // Check all VCTPs are the same. 
+  MachineInstr *FirstVCTP = *VCTPs.begin(); 
+  for (MachineInstr *VCTP : VCTPs) { 
+    LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP); 
+    if (VCTP->getOpcode() != FirstVCTP->getOpcode() || 
+        VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) { 
+      LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n"); 
+      return false; 
+    } 
+  } 
+ 
+  // Check for the register being used can be setup before the loop. We expect 
+  // this to be: 
+  //   $vx = ... 
+  // loop: 
+  //   $vp = PHI [ $vx ], [ $vd ] 
+  //   .. 
+  //   $vpr = VCTP $vp 
+  //   .. 
+  //   $vd = t2SUBri $vp, #n 
+  //   .. 
+  Register CountReg = FirstVCTP->getOperand(1).getReg(); 
+  if (!CountReg.isVirtual()) { 
+    LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n"); 
+    return false; 
+  } 
+  MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI); 
+  if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || 
+      Phi->getNumOperands() != 5 || 
+      (Phi->getOperand(2).getMBB() != ML->getLoopLatch() && 
+       Phi->getOperand(4).getMBB() != ML->getLoopLatch())) { 
+    LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n"); 
+    return false; 
+  } 
+  CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch() 
+                 ? Phi->getOperand(3).getReg() 
+                 : Phi->getOperand(1).getReg(); 
+ 
+  // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of 
+  // the preheader and add the new CountReg to it. We attempt to place it late 
+  // in the preheader, but may need to move that earlier based on uses. 
+  MachineBasicBlock *MBB = LoopStart->getParent(); 
+  MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); 
+  for (MachineInstr &Use : 
+       MRI->use_instructions(LoopStart->getOperand(0).getReg())) 
+    if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || 
+        !DT->dominates(ML->getHeader(), Use.getParent())) { 
+      LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n"); 
+      return false; 
+    } 
+ 
+  MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), 
+                                   TII->get(ARM::t2DoLoopStartTP)) 
+                               .add(LoopStart->getOperand(0)) 
+                               .add(LoopStart->getOperand(1)) 
+                               .addReg(CountReg); 
+  (void)MI; 
+  LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with " 
+                    << *MI.getInstr()); 
+  MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass); 
+  LoopStart->eraseFromParent(); 
+ 
+  return true; 
+} 
+ 
 // Returns true if Opcode is any VCMP Opcode.
 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
 
@@ -650,7 +650,7 @@ bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
   }
 
   for (MachineInstr *DeadInstruction : DeadInstructions)
-    DeadInstruction->eraseFromParent();
+    DeadInstruction->eraseFromParent(); 
 
   return Modified;
 }
@@ -724,160 +724,160 @@ bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
   }
 
   for (MachineInstr *DeadInstruction : DeadInstructions)
-    DeadInstruction->eraseFromParent();
-
-  return !DeadInstructions.empty();
-}
-
-bool MVEVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
-                                               MachineDominatorTree *DT) {
-  // Scan through the block, looking for instructions that use constants moves
-  // into VPR that are the negative of one another. These are expected to be
-  // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
-  // mask is kept it or and VPNOT's of it are added or reused as we scan through
-  // the function.
-  unsigned LastVPTImm = 0;
-  Register LastVPTReg = 0;
-  SmallSet<MachineInstr *, 4> DeadInstructions;
-
-  for (MachineInstr &Instr : MBB.instrs()) {
-    // Look for predicated MVE instructions.
-    int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
-    if (PIdx == -1)
-      continue;
-    Register VPR = Instr.getOperand(PIdx + 1).getReg();
-    if (!VPR.isVirtual())
-      continue;
-
-    // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
-    MachineInstr *Copy = MRI->getVRegDef(VPR);
-    if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
-        !Copy->getOperand(1).getReg().isVirtual() ||
-        MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
-      LastVPTReg = 0;
-      continue;
-    }
-    Register GPR = Copy->getOperand(1).getReg();
-
-    // Find the Immediate used by the copy.
-    auto getImm = [&](Register GPR) -> unsigned {
-      MachineInstr *Def = MRI->getVRegDef(GPR);
-      if (Def && (Def->getOpcode() == ARM::t2MOVi ||
-                  Def->getOpcode() == ARM::t2MOVi16))
-        return Def->getOperand(1).getImm();
-      return -1U;
-    };
-    unsigned Imm = getImm(GPR);
-    if (Imm == -1U) {
-      LastVPTReg = 0;
-      continue;
-    }
-
-    unsigned NotImm = ~Imm & 0xffff;
-    if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
-      Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
-      if (MRI->use_empty(VPR)) {
-        DeadInstructions.insert(Copy);
-        if (MRI->hasOneUse(GPR))
-          DeadInstructions.insert(MRI->getVRegDef(GPR));
-      }
-      LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
-    } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
-      // We have found the not of a previous constant. Create a VPNot of the
-      // earlier predicate reg and use it instead of the copy.
-      Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
-      auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
-                           TII->get(ARM::MVE_VPNOT), NewVPR)
-                       .addReg(LastVPTReg);
-      addUnpredicatedMveVpredNOp(VPNot);
-
-      // Use the new register and check if the def is now dead.
-      Instr.getOperand(PIdx + 1).setReg(NewVPR);
-      if (MRI->use_empty(VPR)) {
-        DeadInstructions.insert(Copy);
-        if (MRI->hasOneUse(GPR))
-          DeadInstructions.insert(MRI->getVRegDef(GPR));
-      }
-      LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
-                        << Instr);
-      VPR = NewVPR;
-    }
-
-    LastVPTImm = Imm;
-    LastVPTReg = VPR;
-  }
-
-  for (MachineInstr *DI : DeadInstructions)
-    DI->eraseFromParent();
-
-  return !DeadInstructions.empty();
-}
-
-// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
-// somewhat blunt approximation to allow tail predicated with vpsel
-// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
-// different semantics under tail predication. Until that is modelled we just
-// convert to a VMOVT (via a predicated VORR) instead.
-bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
-  bool HasVCTP = false;
-  SmallVector<MachineInstr *, 4> DeadInstructions;
-
-  for (MachineInstr &MI : MBB.instrs()) {
-    if (isVCTP(&MI)) {
-      HasVCTP = true;
-      continue;
-    }
-
-    if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
-      continue;
-
-    MachineInstrBuilder MIBuilder =
-        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(1))
-            .add(MI.getOperand(1))
-            .addImm(ARMVCC::Then)
-            .add(MI.getOperand(4))
-            .add(MI.getOperand(2));
-    // Silence unused variable warning in release builds.
-    (void)MIBuilder;
-    LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
-               dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
-    DeadInstructions.push_back(&MI);
-  }
-
-  for (MachineInstr *DeadInstruction : DeadInstructions)
-    DeadInstruction->eraseFromParent();
+    DeadInstruction->eraseFromParent(); 
 
   return !DeadInstructions.empty();
 }
 
+bool MVEVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, 
+                                               MachineDominatorTree *DT) { 
+  // Scan through the block, looking for instructions that use constants moves 
+  // into VPR that are the negative of one another. These are expected to be 
+  // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant 
+  // mask is kept it or and VPNOT's of it are added or reused as we scan through 
+  // the function. 
+  unsigned LastVPTImm = 0; 
+  Register LastVPTReg = 0; 
+  SmallSet<MachineInstr *, 4> DeadInstructions; 
+ 
+  for (MachineInstr &Instr : MBB.instrs()) { 
+    // Look for predicated MVE instructions. 
+    int PIdx = llvm::findFirstVPTPredOperandIdx(Instr); 
+    if (PIdx == -1) 
+      continue; 
+    Register VPR = Instr.getOperand(PIdx + 1).getReg(); 
+    if (!VPR.isVirtual()) 
+      continue; 
+ 
+    // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. 
+    MachineInstr *Copy = MRI->getVRegDef(VPR); 
+    if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || 
+        !Copy->getOperand(1).getReg().isVirtual() || 
+        MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) { 
+      LastVPTReg = 0; 
+      continue; 
+    } 
+    Register GPR = Copy->getOperand(1).getReg(); 
+ 
+    // Find the Immediate used by the copy. 
+    auto getImm = [&](Register GPR) -> unsigned { 
+      MachineInstr *Def = MRI->getVRegDef(GPR); 
+      if (Def && (Def->getOpcode() == ARM::t2MOVi || 
+                  Def->getOpcode() == ARM::t2MOVi16)) 
+        return Def->getOperand(1).getImm(); 
+      return -1U; 
+    }; 
+    unsigned Imm = getImm(GPR); 
+    if (Imm == -1U) { 
+      LastVPTReg = 0; 
+      continue; 
+    } 
+ 
+    unsigned NotImm = ~Imm & 0xffff; 
+    if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { 
+      Instr.getOperand(PIdx + 1).setReg(LastVPTReg); 
+      if (MRI->use_empty(VPR)) { 
+        DeadInstructions.insert(Copy); 
+        if (MRI->hasOneUse(GPR)) 
+          DeadInstructions.insert(MRI->getVRegDef(GPR)); 
+      } 
+      LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr); 
+    } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { 
+      // We have found the not of a previous constant. Create a VPNot of the 
+      // earlier predicate reg and use it instead of the copy. 
+      Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass); 
+      auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(), 
+                           TII->get(ARM::MVE_VPNOT), NewVPR) 
+                       .addReg(LastVPTReg); 
+      addUnpredicatedMveVpredNOp(VPNot); 
+ 
+      // Use the new register and check if the def is now dead. 
+      Instr.getOperand(PIdx + 1).setReg(NewVPR); 
+      if (MRI->use_empty(VPR)) { 
+        DeadInstructions.insert(Copy); 
+        if (MRI->hasOneUse(GPR)) 
+          DeadInstructions.insert(MRI->getVRegDef(GPR)); 
+      } 
+      LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at " 
+                        << Instr); 
+      VPR = NewVPR; 
+    } 
+ 
+    LastVPTImm = Imm; 
+    LastVPTReg = VPR; 
+  } 
+ 
+  for (MachineInstr *DI : DeadInstructions) 
+    DI->eraseFromParent(); 
+ 
+  return !DeadInstructions.empty(); 
+} 
+ 
+// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a 
+// somewhat blunt approximation to allow tail predicated with vpsel 
+// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly 
+// different semantics under tail predication. Until that is modelled we just 
+// convert to a VMOVT (via a predicated VORR) instead. 
+bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { 
+  bool HasVCTP = false; 
+  SmallVector<MachineInstr *, 4> DeadInstructions; 
+ 
+  for (MachineInstr &MI : MBB.instrs()) { 
+    if (isVCTP(&MI)) { 
+      HasVCTP = true; 
+      continue; 
+    } 
+ 
+    if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) 
+      continue; 
+ 
+    MachineInstrBuilder MIBuilder = 
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) 
+            .add(MI.getOperand(0)) 
+            .add(MI.getOperand(1)) 
+            .add(MI.getOperand(1)) 
+            .addImm(ARMVCC::Then) 
+            .add(MI.getOperand(4)) 
+            .add(MI.getOperand(2)); 
+    // Silence unused variable warning in release builds. 
+    (void)MIBuilder; 
+    LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); 
+               dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump()); 
+    DeadInstructions.push_back(&MI); 
+  } 
+ 
+  for (MachineInstr *DeadInstruction : DeadInstructions) 
+    DeadInstruction->eraseFromParent(); 
+ 
+  return !DeadInstructions.empty(); 
+} 
+ 
 bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
   const ARMSubtarget &STI =
       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
 
-  if (!STI.isThumb2() || !STI.hasLOB())
+  if (!STI.isThumb2() || !STI.hasLOB()) 
     return false;
 
   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
   MRI = &Fn.getRegInfo();
-  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
-  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>(); 
+  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); 
 
   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
                     << "********** Function: " << Fn.getName() << '\n');
 
   bool Modified = false;
-  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
-    Modified |= MergeLoopEnd(ML);
-    Modified |= ConvertTailPredLoop(ML, DT);
-  }
-
+  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) { 
+    Modified |= MergeLoopEnd(ML); 
+    Modified |= ConvertTailPredLoop(ML, DT); 
+  } 
+ 
   for (MachineBasicBlock &MBB : Fn) {
-    Modified |= ReplaceConstByVPNOTs(MBB, DT);
+    Modified |= ReplaceConstByVPNOTs(MBB, DT); 
     Modified |= ReplaceVCMPsByVPNOTs(MBB);
     Modified |= ReduceOldVCCRValueUses(MBB);
-    Modified |= ConvertVPSEL(MBB);
+    Modified |= ConvertVPSEL(MBB); 
   }
 
   LLVM_DEBUG(dbgs() << "**************************************\n");
diff --git a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
index 089e7bf206..3f7fdcb6de 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/TargetInfo/ya.make
@@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/lib/Support
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/lib/Support 
 )
 
 ADDINCL(
-    contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM/TargetInfo
+    contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM/TargetInfo 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
index d728572e28..9dd389f440 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -12,7 +12,7 @@
 
 #include "Thumb2InstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMSubtarget.h"
+#include "ARMSubtarget.h" 
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -39,11 +39,11 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
            cl::desc("Use old-style Thumb2 if-conversion heuristics"),
            cl::init(false));
 
-static cl::opt<bool>
-PreferNoCSEL("prefer-no-csel", cl::Hidden,
-             cl::desc("Prefer predicated Move to CSEL"),
-             cl::init(false));
-
+static cl::opt<bool> 
+PreferNoCSEL("prefer-no-csel", cl::Hidden, 
+             cl::desc("Prefer predicated Move to CSEL"), 
+             cl::init(false)); 
+ 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
     : ARMBaseInstrInfo(STI) {}
 
@@ -124,31 +124,31 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
   return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
 }
 
-MachineInstr *
-Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
-                                SmallPtrSetImpl<MachineInstr *> &SeenMIs,
-                                bool PreferFalse) const {
-  // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the
-  // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
-  // CSEL.
-  MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
-  if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) {
-    Register DestReg = MI.getOperand(0).getReg();
-
-    if (!DestReg.isVirtual())
-      return nullptr;
-
-    MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
-                                        get(ARM::t2CSEL), DestReg)
-                                    .add(MI.getOperand(2))
-                                    .add(MI.getOperand(1))
-                                    .add(MI.getOperand(3));
-    SeenMIs.insert(NewMI);
-    return NewMI;
-  }
-  return RV;
-}
-
+MachineInstr * 
+Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, 
+                                SmallPtrSetImpl<MachineInstr *> &SeenMIs, 
+                                bool PreferFalse) const { 
+  // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the 
+  // MOVCC into another instruction. If that fails on 8.1-M fall back to using a 
+  // CSEL. 
+  MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); 
+  if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) { 
+    Register DestReg = MI.getOperand(0).getReg(); 
+ 
+    if (!DestReg.isVirtual()) 
+      return nullptr; 
+ 
+    MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 
+                                        get(ARM::t2CSEL), DestReg) 
+                                    .add(MI.getOperand(2)) 
+                                    .add(MI.getOperand(1)) 
+                                    .add(MI.getOperand(3)); 
+    SeenMIs.insert(NewMI); 
+    return NewMI; 
+  } 
+  return RV; 
+} 
+ 
 void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   const DebugLoc &DL, MCRegister DestReg,
@@ -258,22 +258,22 @@ void Thumb2InstrInfo::expandLoadStackGuard(
     expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
 }
 
-MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
-                                                      bool NewMI,
-                                                      unsigned OpIdx1,
-                                                      unsigned OpIdx2) const {
-  switch (MI.getOpcode()) {
-  case ARM::MVE_VMAXNMAf16:
-  case ARM::MVE_VMAXNMAf32:
-  case ARM::MVE_VMINNMAf16:
-  case ARM::MVE_VMINNMAf32:
-    // Don't allow predicated instructions to be commuted.
-    if (getVPTInstrPredicate(MI) != ARMVCC::None)
-      return nullptr;
-  }
-  return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
-}
-
+MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, 
+                                                      bool NewMI, 
+                                                      unsigned OpIdx1, 
+                                                      unsigned OpIdx2) const { 
+  switch (MI.getOpcode()) { 
+  case ARM::MVE_VMAXNMAf16: 
+  case ARM::MVE_VMAXNMAf32: 
+  case ARM::MVE_VMINNMAf16: 
+  case ARM::MVE_VMINNMAf32: 
+    // Don't allow predicated instructions to be commuted. 
+    if (getVPTInstrPredicate(MI) != ARMVCC::None) 
+      return nullptr; 
+  } 
+  return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 
+} 
+ 
 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator &MBBI,
                                   const DebugLoc &dl, Register DestReg,
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
index 808167bfdc..6fda236159 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2InstrInfo.h
@@ -60,14 +60,14 @@ public:
   ///
   const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
 
-  MachineInstr *optimizeSelect(MachineInstr &MI,
-                               SmallPtrSetImpl<MachineInstr *> &SeenMIs,
-                               bool) const override;
-
-  MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
-                                       unsigned OpIdx1,
-                                       unsigned OpIdx2) const override;
-
+  MachineInstr *optimizeSelect(MachineInstr &MI, 
+                               SmallPtrSetImpl<MachineInstr *> &SeenMIs, 
+                               bool) const override; 
+ 
+  MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 
+                                       unsigned OpIdx1, 
+                                       unsigned OpIdx2) const override; 
+ 
 private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
 };
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0f7e190386..a200a5cf35 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/libs/llvm12/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -43,7 +43,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "thumb2-reduce-size"
+#define DEBUG_TYPE "thumb2-reduce-size" 
 #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
 
 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
diff --git a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
index 7a980b708c..fed79316b8 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/Utils/ya.make
@@ -12,15 +12,15 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/Support
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/Support 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM/Utils
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM/Utils 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ya.make b/contrib/libs/llvm12/lib/Target/ARM/ya.make
index 9551f9f11b..7387bc4532 100644
--- a/contrib/libs/llvm12/lib/Target/ARM/ya.make
+++ b/contrib/libs/llvm12/lib/Target/ARM/ya.make
@@ -12,28 +12,28 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/Analysis
-    contrib/libs/llvm12/lib/CodeGen
-    contrib/libs/llvm12/lib/CodeGen/AsmPrinter
-    contrib/libs/llvm12/lib/CodeGen/GlobalISel
-    contrib/libs/llvm12/lib/CodeGen/SelectionDAG
-    contrib/libs/llvm12/lib/IR
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target
-    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/ARM/TargetInfo
-    contrib/libs/llvm12/lib/Target/ARM/Utils
-    contrib/libs/llvm12/lib/Transforms/CFGuard
-    contrib/libs/llvm12/lib/Transforms/Scalar
-    contrib/libs/llvm12/lib/Transforms/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/Analysis 
+    contrib/libs/llvm12/lib/CodeGen 
+    contrib/libs/llvm12/lib/CodeGen/AsmPrinter 
+    contrib/libs/llvm12/lib/CodeGen/GlobalISel 
+    contrib/libs/llvm12/lib/CodeGen/SelectionDAG 
+    contrib/libs/llvm12/lib/IR 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target 
+    contrib/libs/llvm12/lib/Target/ARM/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/ARM/TargetInfo 
+    contrib/libs/llvm12/lib/Target/ARM/Utils 
+    contrib/libs/llvm12/lib/Transforms/CFGuard 
+    contrib/libs/llvm12/lib/Transforms/Scalar 
+    contrib/libs/llvm12/lib/Transforms/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM
-    contrib/libs/llvm12/lib/Target/ARM
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/ARM 
+    contrib/libs/llvm12/lib/Target/ARM 
 )
 
 NO_COMPILER_WARNINGS()
@@ -46,7 +46,7 @@ SRCS(
     ARMBaseInstrInfo.cpp
     ARMBaseRegisterInfo.cpp
     ARMBasicBlockInfo.cpp
-    ARMBlockPlacement.cpp
+    ARMBlockPlacement.cpp 
     ARMCallLowering.cpp
     ARMCallingConv.cpp
     ARMConstantIslandPass.cpp
@@ -69,7 +69,7 @@ SRCS(
     ARMParallelDSP.cpp
     ARMRegisterBankInfo.cpp
     ARMRegisterInfo.cpp
-    ARMSLSHardening.cpp
+    ARMSLSHardening.cpp 
     ARMSelectionDAGInfo.cpp
     ARMSubtarget.cpp
     ARMTargetMachine.cpp
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:30 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:30 +0300
commit	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree	012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/ARM
parent	6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download	ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz