Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.

author: shadchin <shadchin@yandex-team.ru> 2022-02-10 16:44:30 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:30 +0300
commit: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree: 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64
parent: 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download: ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
85 files changed, 14997 insertions, 14997 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
index d2170a99e0..88d25e474e 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
@@ -58,10 +58,10 @@ ModulePass *createSVEIntrinsicOptsPass();
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
                                  AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
-FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
-FunctionPass *createAArch64PostLegalizerLowering();
-FunctionPass *createAArch64PostSelectOptimize();
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone); 
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone); 
+FunctionPass *createAArch64PostLegalizerLowering(); 
+FunctionPass *createAArch64PostSelectOptimize(); 
 FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
 FunctionPass *createAArch64StackTaggingPreRAPass();
 
@@ -82,8 +82,8 @@ void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64SIMDInstrOptPass(PassRegistry&);
 void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
 void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
-void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
-void initializeAArch64PostSelectOptimizePass(PassRegistry &);
+void initializeAArch64PostLegalizerLoweringPass(PassRegistry &); 
+void initializeAArch64PostSelectOptimizePass(PassRegistry &); 
 void initializeAArch64PromoteConstantPass(PassRegistry&);
 void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
 void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
index 762855207d..385216a208 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
@@ -61,9 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
 def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
   "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
 
-def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
-  "Enable out of line atomics to support LSE instructions">;
-
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true", 
+  "Enable out of line atomics to support LSE instructions">; 
+ 
 def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
   "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
 
@@ -75,12 +75,12 @@ def FeatureLOR : SubtargetFeature<
     "lor", "HasLOR", "true",
     "Enables ARM v8.1 Limited Ordering Regions extension">;
 
-def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
-    "true", "Enable RW operand CONTEXTIDR_EL2" >;
-
-def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
-    "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;
+def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2", 
+    "true", "Enable RW operand CONTEXTIDR_EL2" >; 
 
+def FeatureVH : SubtargetFeature<"vh", "HasVH", "true", 
+    "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >; 
+ 
 def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
   "Enable ARMv8 PMUv3 Performance Monitors extension">;
 
@@ -218,10 +218,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
     "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
     "CPU fuses arithmetic + cbz/cbnz operations">;
 
-def FeatureCmpBccFusion : SubtargetFeature<
-    "cmp-bcc-fusion", "HasCmpBccFusion", "true",
-    "CPU fuses cmp+bcc operations">;
-
+def FeatureCmpBccFusion : SubtargetFeature< 
+    "cmp-bcc-fusion", "HasCmpBccFusion", "true", 
+    "CPU fuses cmp+bcc operations">; 
+ 
 def FeatureFuseAddress : SubtargetFeature<
     "fuse-address", "HasFuseAddress", "true",
     "CPU fuses address generation and memory operations">;
@@ -265,8 +265,8 @@ def FeatureDotProd : SubtargetFeature<
     "dotprod", "HasDotProd", "true",
     "Enable dot product support">;
 
-def FeaturePAuth : SubtargetFeature<
-    "pauth", "HasPAuth", "true",
+def FeaturePAuth : SubtargetFeature< 
+    "pauth", "HasPAuth", "true", 
     "Enable v8.3-A Pointer Authentication extension">;
 
 def FeatureJS : SubtargetFeature<
@@ -320,8 +320,8 @@ def FeatureTLB_RMI : SubtargetFeature<
     "tlb-rmi", "HasTLB_RMI", "true",
     "Enable v8.4-A TLB Range and Maintenance Instructions">;
 
-def FeatureFlagM : SubtargetFeature<
-    "flagm", "HasFlagM", "true",
+def FeatureFlagM : SubtargetFeature< 
+    "flagm", "HasFlagM", "true", 
     "Enable v8.4-A Flag Manipulation Instructions">;
 
 // 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
@@ -404,24 +404,24 @@ def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32",
 def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64",
     "true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>;
 
-def FeatureXS : SubtargetFeature<"xs", "HasXS",
-    "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">;
-
-def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT",
-    "true", "Enable Armv8.7-A WFET and WFIT instruction">;
-
-def FeatureHCX : SubtargetFeature<
-    "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">;
-
-def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
-    "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
-
-def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
-    "true", "Enable Branch Record Buffer Extension">;
-
-def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
-    "true", "Enable extra register in the Statistical Profiling Extension">;
-
+def FeatureXS : SubtargetFeature<"xs", "HasXS", 
+    "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">; 
+ 
+def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT", 
+    "true", "Enable Armv8.7-A WFET and WFIT instruction">; 
+ 
+def FeatureHCX : SubtargetFeature< 
+    "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">; 
+ 
+def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64", 
+    "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">; 
+ 
+def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE", 
+    "true", "Enable Branch Record Buffer Extension">; 
+ 
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF", 
+    "true", "Enable extra register in the Statistical Profiling Extension">; 
+ 
 def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
     "true", "Enable fine grained virtualization traps extension">;
 
@@ -442,14 +442,14 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
   FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
 
 def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
-  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth,
+  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth, 
   FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
 
 def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
   "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
-  FeatureNV, FeatureMPAM, FeatureDIT,
+  FeatureNV, FeatureMPAM, FeatureDIT, 
   FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
-  FeatureFlagM, FeatureRCPC_IMMO]>;
+  FeatureFlagM, FeatureRCPC_IMMO]>; 
 
 def HasV8_5aOps : SubtargetFeature<
   "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
@@ -462,26 +462,26 @@ def HasV8_6aOps : SubtargetFeature<
   [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
    FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;
 
-def HasV8_7aOps : SubtargetFeature<
-  "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
-  [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
-
-def HasV8_0rOps : SubtargetFeature<
-  "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
-  [//v8.1
-  FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
-  //v8.2
-  FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
-  FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
-  //v8.3
-  FeatureComplxNum, FeatureCCIDX, FeatureJS,
-  FeaturePAuth, FeatureRCPC,
-  //v8.4
-  FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
-  FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
-  //v8.5
-  FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
-
+def HasV8_7aOps : SubtargetFeature< 
+  "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions", 
+  [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>; 
+ 
+def HasV8_0rOps : SubtargetFeature< 
+  "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", 
+  [//v8.1 
+  FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2, 
+  //v8.2 
+  FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4, 
+  FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV, 
+  //v8.3 
+  FeatureComplxNum, FeatureCCIDX, FeatureJS, 
+  FeaturePAuth, FeatureRCPC, 
+  //v8.4 
+  FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4, 
+  FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, 
+  //v8.5 
+  FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>; 
+ 
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -543,11 +543,11 @@ def SVEUnsupported : AArch64Unsupported {
 }
 
 def PAUnsupported : AArch64Unsupported {
-  let F = [HasPAuth];
+  let F = [HasPAuth]; 
 }
 
 include "AArch64SchedA53.td"
-include "AArch64SchedA55.td"
+include "AArch64SchedA55.td" 
 include "AArch64SchedA57.td"
 include "AArch64SchedCyclone.td"
 include "AArch64SchedFalkor.td"
@@ -557,9 +557,9 @@ include "AArch64SchedExynosM4.td"
 include "AArch64SchedExynosM5.td"
 include "AArch64SchedThunderX.td"
 include "AArch64SchedThunderX2T99.td"
-include "AArch64SchedA64FX.td"
+include "AArch64SchedA64FX.td" 
 include "AArch64SchedThunderX3T110.td"
-include "AArch64SchedTSV110.td"
+include "AArch64SchedTSV110.td" 
 
 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
                                    "Cortex-A35 ARM processors", [
@@ -619,9 +619,9 @@ def ProcA65     : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
                                    FeatureDotProd,
                                    FeatureFPARMv8,
                                    FeatureFullFP16,
-                                   FeatureFuseAddress,
-                                   FeatureFuseAES,
-                                   FeatureFuseLiterals,
+                                   FeatureFuseAddress, 
+                                   FeatureFuseAES, 
+                                   FeatureFuseLiterals, 
                                    FeatureNEON,
                                    FeatureRAS,
                                    FeatureRCPC,
@@ -634,7 +634,7 @@ def ProcA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
                                    FeatureCrypto,
                                    FeatureFPARMv8,
                                    FeatureFuseAES,
-                                   FeatureFuseLiterals,
+                                   FeatureFuseLiterals, 
                                    FeatureNEON,
                                    FeaturePerfMon
                                    ]>;
@@ -666,7 +666,7 @@ def ProcA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
                                    "Cortex-A76 ARM processors", [
                                     HasV8_2aOps,
                                     FeatureFPARMv8,
-                                    FeatureFuseAES,
+                                    FeatureFuseAES, 
                                     FeatureNEON,
                                     FeatureRCPC,
                                     FeatureCrypto,
@@ -678,9 +678,9 @@ def ProcA76     : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
 def ProcA77     : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
                                    "Cortex-A77 ARM processors", [
                                     HasV8_2aOps,
-                                    FeatureCmpBccFusion,
+                                    FeatureCmpBccFusion, 
                                     FeatureFPARMv8,
-                                    FeatureFuseAES,
+                                    FeatureFuseAES, 
                                     FeatureNEON, FeatureRCPC,
                                     FeatureCrypto,
                                     FeatureFullFP16,
@@ -691,7 +691,7 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
                                "CortexA78",
                                "Cortex-A78 ARM processors", [
                                HasV8_2aOps,
-                               FeatureCmpBccFusion,
+                               FeatureCmpBccFusion, 
                                FeatureCrypto,
                                FeatureFPARMv8,
                                FeatureFuseAES,
@@ -704,39 +704,39 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
                                FeatureSSBS,
                                FeatureDotProd]>;
 
-def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
-                                "CortexA78C",
-                                "Cortex-A78C ARM processors", [
-                                HasV8_2aOps,
-                                FeatureCmpBccFusion,
-                                FeatureCrypto,
-                                FeatureDotProd,
-                                FeatureFlagM,
-                                FeatureFP16FML,
-                                FeatureFPARMv8,
-                                FeatureFullFP16,
-                                FeatureFuseAES,
-                                FeatureNEON,
-                                FeaturePAuth,
-                                FeaturePerfMon,
-                                FeaturePostRAScheduler,
-                                FeatureRCPC,
-                                FeatureSPE,
-                                FeatureSSBS]>;
-
-def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
-                               "CortexR82",
-                               "Cortex-R82 ARM Processors", [
-                               FeaturePostRAScheduler,
-                               // TODO: crypto and FuseAES
-                               // All other features are implied by v8_0r ops:
-                               HasV8_0rOps,
-                               ]>;
-
+def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily", 
+                                "CortexA78C", 
+                                "Cortex-A78C ARM processors", [ 
+                                HasV8_2aOps, 
+                                FeatureCmpBccFusion, 
+                                FeatureCrypto, 
+                                FeatureDotProd, 
+                                FeatureFlagM, 
+                                FeatureFP16FML, 
+                                FeatureFPARMv8, 
+                                FeatureFullFP16, 
+                                FeatureFuseAES, 
+                                FeatureNEON, 
+                                FeaturePAuth, 
+                                FeaturePerfMon, 
+                                FeaturePostRAScheduler, 
+                                FeatureRCPC, 
+                                FeatureSPE, 
+                                FeatureSSBS]>; 
+ 
+def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", 
+                               "CortexR82", 
+                               "Cortex-R82 ARM Processors", [ 
+                               FeaturePostRAScheduler, 
+                               // TODO: crypto and FuseAES 
+                               // All other features are implied by v8_0r ops: 
+                               HasV8_0rOps, 
+                               ]>; 
+ 
 def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
                                   "Cortex-X1 ARM processors", [
                                   HasV8_2aOps,
-                                  FeatureCmpBccFusion,
+                                  FeatureCmpBccFusion, 
                                   FeatureCrypto,
                                   FeatureFPARMv8,
                                   FeatureFuseAES,
@@ -758,10 +758,10 @@ def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
                                   FeatureFullFP16,
                                   FeatureSVE,
                                   FeaturePostRAScheduler,
-                                  FeatureComplxNum,
-                                  FeatureAggressiveFMA,
-                                  FeatureArithmeticBccFusion,
-                                  FeaturePredictableSelectIsExpensive
+                                  FeatureComplxNum, 
+                                  FeatureAggressiveFMA, 
+                                  FeatureArithmeticBccFusion, 
+                                  FeaturePredictableSelectIsExpensive 
                                   ]>;
 
 def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
@@ -868,38 +868,38 @@ def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
                                      HasV8_4aOps
                                      ]>;
 
-def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
-                                     "Apple A14", [
-                                     FeatureAggressiveFMA,
-                                     FeatureAlternateSExtLoadCVTF32Pattern,
-                                     FeatureAltFPCmp,
-                                     FeatureArithmeticBccFusion,
-                                     FeatureArithmeticCbzFusion,
-                                     FeatureCrypto,
-                                     FeatureDisableLatencySchedHeuristic,
-                                     FeatureFPARMv8,
-                                     FeatureFRInt3264,
-                                     FeatureFuseAddress,
-                                     FeatureFuseAES,
-                                     FeatureFuseArithmeticLogic,
-                                     FeatureFuseCCSelect,
-                                     FeatureFuseCryptoEOR,
-                                     FeatureFuseLiterals,
-                                     FeatureNEON,
-                                     FeaturePerfMon,
-                                     FeatureSpecRestrict,
-                                     FeatureSSBS,
-                                     FeatureSB,
-                                     FeaturePredRes,
-                                     FeatureCacheDeepPersist,
-                                     FeatureZCRegMove,
-                                     FeatureZCZeroing,
-                                     FeatureFullFP16,
-                                     FeatureFP16FML,
-                                     FeatureSHA3,
-                                     HasV8_4aOps
-                                     ]>;
-
+def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", 
+                                     "Apple A14", [ 
+                                     FeatureAggressiveFMA, 
+                                     FeatureAlternateSExtLoadCVTF32Pattern, 
+                                     FeatureAltFPCmp, 
+                                     FeatureArithmeticBccFusion, 
+                                     FeatureArithmeticCbzFusion, 
+                                     FeatureCrypto, 
+                                     FeatureDisableLatencySchedHeuristic, 
+                                     FeatureFPARMv8, 
+                                     FeatureFRInt3264, 
+                                     FeatureFuseAddress, 
+                                     FeatureFuseAES, 
+                                     FeatureFuseArithmeticLogic, 
+                                     FeatureFuseCCSelect, 
+                                     FeatureFuseCryptoEOR, 
+                                     FeatureFuseLiterals, 
+                                     FeatureNEON, 
+                                     FeaturePerfMon, 
+                                     FeatureSpecRestrict, 
+                                     FeatureSSBS, 
+                                     FeatureSB, 
+                                     FeaturePredRes, 
+                                     FeatureCacheDeepPersist, 
+                                     FeatureZCRegMove, 
+                                     FeatureZCZeroing, 
+                                     FeatureFullFP16, 
+                                     FeatureFP16FML, 
+                                     FeatureSHA3, 
+                                     HasV8_4aOps 
+                                     ]>; 
+ 
 def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                     "Samsung Exynos-M3 processors",
                                     [FeatureCRC,
@@ -993,38 +993,38 @@ def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
                                       FeatureSSBS,
                                       ]>;
 
-def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
-                                      "NeoverseN2",
-                                      "Neoverse N2 ARM processors", [
-                                      HasV8_5aOps,
-                                      FeatureBF16,
-                                      FeatureETE,
-                                      FeatureMatMulInt8,
-                                      FeatureMTE,
-                                      FeatureSVE2,
-                                      FeatureSVE2BitPerm,
-                                      FeatureTRBE]>;
-
-def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
-                                      "NeoverseV1",
-                                      "Neoverse V1 ARM processors", [
-                                      HasV8_4aOps,
-                                      FeatureBF16,
-                                      FeatureCacheDeepPersist,
-                                      FeatureCrypto,
-                                      FeatureFPARMv8,
-                                      FeatureFP16FML,
-                                      FeatureFullFP16,
-                                      FeatureFuseAES,
-                                      FeatureMatMulInt8,
-                                      FeatureNEON,
-                                      FeaturePerfMon,
-                                      FeaturePostRAScheduler,
-                                      FeatureRandGen,
-                                      FeatureSPE,
-                                      FeatureSSBS,
-                                      FeatureSVE]>;
-
+def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", 
+                                      "NeoverseN2", 
+                                      "Neoverse N2 ARM processors", [ 
+                                      HasV8_5aOps, 
+                                      FeatureBF16, 
+                                      FeatureETE, 
+                                      FeatureMatMulInt8, 
+                                      FeatureMTE, 
+                                      FeatureSVE2, 
+                                      FeatureSVE2BitPerm, 
+                                      FeatureTRBE]>; 
+ 
+def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", 
+                                      "NeoverseV1", 
+                                      "Neoverse V1 ARM processors", [ 
+                                      HasV8_4aOps, 
+                                      FeatureBF16, 
+                                      FeatureCacheDeepPersist, 
+                                      FeatureCrypto, 
+                                      FeatureFPARMv8, 
+                                      FeatureFP16FML, 
+                                      FeatureFullFP16, 
+                                      FeatureFuseAES, 
+                                      FeatureMatMulInt8, 
+                                      FeatureNEON, 
+                                      FeaturePerfMon, 
+                                      FeaturePostRAScheduler, 
+                                      FeatureRandGen, 
+                                      FeatureSPE, 
+                                      FeatureSSBS, 
+                                      FeatureSVE]>; 
+ 
 def ProcSaphira  : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
                                    "Qualcomm Saphira processors", [
                                    FeatureCrypto,
@@ -1065,7 +1065,7 @@ def ProcThunderX3T110  : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
                                           FeaturePostRAScheduler,
                                           FeaturePredictableSelectIsExpensive,
                                           FeatureLSE,
-                                          FeaturePAuth,
+                                          FeaturePAuth, 
                                           FeatureUseAA,
                                           FeatureBalanceFPOps,
                                           FeaturePerfMon,
@@ -1147,7 +1147,7 @@ def : ProcessorModel<"generic", NoSchedModel, [
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>; 
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
 def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
 def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
@@ -1158,13 +1158,13 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
 def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
 def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
 def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
-def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
-def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>; 
+def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>; 
 def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
 def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
 def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
-def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
-def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
+def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>; 
+def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>; 
 def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
 def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
 def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
@@ -1180,7 +1180,7 @@ def : ProcessorModel<"thunderxt83", ThunderXT8XModel,  [ProcThunderXT83]>;
 def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
 // Marvell ThunderX3T110 Processors.
 def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
-def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
+def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>; 
 
 // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
 def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
@@ -1193,7 +1193,7 @@ def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
 def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
 def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
 def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
-def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>; 
 
 // watch CPUs.
 def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
@@ -1203,7 +1203,7 @@ def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
 def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;
 
 // Fujitsu A64FX
-def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>; 
 
 // Nvidia Carmel
 def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index c996d2df8c..74fd2411f4 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -123,7 +123,7 @@ static bool isFPR64(unsigned Reg, unsigned SubReg,
 }
 
 // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
-// copy instruction. Return nullptr if the instruction is not a copy.
+// copy instruction. Return nullptr if the instruction is not a copy. 
 static MachineOperand *getSrcFromCopy(MachineInstr *MI,
                                       const MachineRegisterInfo *MRI,
                                       unsigned &SubReg) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
index a0c5498ee6..419af6785c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,7 +32,7 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/CodeGen/FaultMaps.h" 
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -55,7 +55,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" 
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -71,13 +71,13 @@ namespace {
 class AArch64AsmPrinter : public AsmPrinter {
   AArch64MCInstLower MCInstLowering;
   StackMaps SM;
-  FaultMaps FM;
+  FaultMaps FM; 
   const AArch64Subtarget *STI;
 
 public:
   AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
-        SM(*this), FM(*this) {}
+        SM(*this), FM(*this) {} 
 
   StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
 
@@ -92,15 +92,15 @@ public:
 
   void emitFunctionEntryLabel() override;
 
-  void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
+  void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI); 
 
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
                        const MachineInstr &MI);
-  void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                       const MachineInstr &MI);
-  void LowerFAULTING_OP(const MachineInstr &MI);
+  void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, 
+                       const MachineInstr &MI); 
+  void LowerFAULTING_OP(const MachineInstr &MI); 
 
   void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
   void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
@@ -195,24 +195,24 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
     return;
 
   // Assemble feature flags that may require creation of a note section.
-  unsigned Flags = 0;
-  if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
-          M.getModuleFlag("branch-target-enforcement")))
-    if (BTE->getZExtValue())
-      Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+  unsigned Flags = 0; 
+  if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( 
+          M.getModuleFlag("branch-target-enforcement"))) 
+    if (BTE->getZExtValue()) 
+      Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI; 
 
-  if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
-          M.getModuleFlag("sign-return-address")))
-    if (Sign->getZExtValue())
-      Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+  if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( 
+          M.getModuleFlag("sign-return-address"))) 
+    if (Sign->getZExtValue()) 
+      Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC; 
 
   if (Flags == 0)
     return;
 
   // Emit a .note.gnu.property section with the flags.
-  if (auto *TS = static_cast<AArch64TargetStreamer *>(
-          OutStreamer->getTargetStreamer()))
-    TS->emitNoteSection(Flags);
+  if (auto *TS = static_cast<AArch64TargetStreamer *>( 
+          OutStreamer->getTargetStreamer())) 
+    TS->emitNoteSection(Flags); 
 }
 
 void AArch64AsmPrinter::emitFunctionHeaderComment() {
@@ -303,7 +303,7 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
     std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
                           utostr(AccessInfo);
     if (IsShort)
-      SymName += "_short_v2";
+      SymName += "_short_v2"; 
     Sym = OutContext.getOrCreateSymbol(SymName);
   }
 
@@ -320,7 +320,7 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
   assert(TT.isOSBinFormatELF());
   std::unique_ptr<MCSubtargetInfo> STI(
       TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
-  assert(STI && "Unable to create subtarget info");
+  assert(STI && "Unable to create subtarget info"); 
 
   MCSymbol *HwasanTagMismatchV1Sym =
       OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
@@ -340,15 +340,15 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
         IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref;
     MCSymbol *Sym = P.second;
 
-    bool HasMatchAllTag =
-        (AccessInfo >> HWASanAccessInfo::HasMatchAllShift) & 1;
-    uint8_t MatchAllTag =
-        (AccessInfo >> HWASanAccessInfo::MatchAllShift) & 0xff;
-    unsigned Size =
-        1 << ((AccessInfo >> HWASanAccessInfo::AccessSizeShift) & 0xf);
-    bool CompileKernel =
-        (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1;
-
+    bool HasMatchAllTag = 
+        (AccessInfo >> HWASanAccessInfo::HasMatchAllShift) & 1; 
+    uint8_t MatchAllTag = 
+        (AccessInfo >> HWASanAccessInfo::MatchAllShift) & 0xff; 
+    unsigned Size = 
+        1 << ((AccessInfo >> HWASanAccessInfo::AccessSizeShift) & 0xf); 
+    bool CompileKernel = 
+        (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1; 
+ 
     OutStreamer->SwitchSection(OutContext.getELFSection(
         ".text.hot", ELF::SHT_PROGBITS,
         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
@@ -359,21 +359,21 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
     OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
     OutStreamer->emitLabel(Sym);
 
-    OutStreamer->emitInstruction(MCInstBuilder(AArch64::SBFMXri)
+    OutStreamer->emitInstruction(MCInstBuilder(AArch64::SBFMXri) 
                                      .addReg(AArch64::X16)
                                      .addReg(Reg)
                                      .addImm(4)
                                      .addImm(55),
                                  *STI);
     OutStreamer->emitInstruction(
-        MCInstBuilder(AArch64::LDRBBroX)
-            .addReg(AArch64::W16)
-            .addReg(IsShort ? AArch64::X20 : AArch64::X9)
-            .addReg(AArch64::X16)
-            .addImm(0)
-            .addImm(0),
-        *STI);
-    OutStreamer->emitInstruction(
+        MCInstBuilder(AArch64::LDRBBroX) 
+            .addReg(AArch64::W16) 
+            .addReg(IsShort ? AArch64::X20 : AArch64::X9) 
+            .addReg(AArch64::X16) 
+            .addImm(0) 
+            .addImm(0), 
+        *STI); 
+    OutStreamer->emitInstruction( 
         MCInstBuilder(AArch64::SUBSXrs)
             .addReg(AArch64::XZR)
             .addReg(AArch64::X16)
@@ -393,26 +393,26 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
         MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
     OutStreamer->emitLabel(HandleMismatchOrPartialSym);
 
-    if (HasMatchAllTag) {
-      OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
-                                       .addReg(AArch64::X16)
-                                       .addReg(Reg)
-                                       .addImm(56)
-                                       .addImm(63),
-                                   *STI);
-      OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri)
-                                       .addReg(AArch64::XZR)
-                                       .addReg(AArch64::X16)
-                                       .addImm(MatchAllTag)
-                                       .addImm(0),
-                                   *STI);
-      OutStreamer->emitInstruction(
-          MCInstBuilder(AArch64::Bcc)
-              .addImm(AArch64CC::EQ)
-              .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
-          *STI);
-    }
-
+    if (HasMatchAllTag) { 
+      OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri) 
+                                       .addReg(AArch64::X16) 
+                                       .addReg(Reg) 
+                                       .addImm(56) 
+                                       .addImm(63), 
+                                   *STI); 
+      OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri) 
+                                       .addReg(AArch64::XZR) 
+                                       .addReg(AArch64::X16) 
+                                       .addImm(MatchAllTag) 
+                                       .addImm(0), 
+                                   *STI); 
+      OutStreamer->emitInstruction( 
+          MCInstBuilder(AArch64::Bcc) 
+              .addImm(AArch64CC::EQ) 
+              .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)), 
+          *STI); 
+    } 
+ 
     if (IsShort) {
       OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSWri)
                                        .addReg(AArch64::WZR)
@@ -501,40 +501,40 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
                                        .addImm(0),
                                    *STI);
     OutStreamer->emitInstruction(
-        MCInstBuilder(AArch64::MOVZXi)
-            .addReg(AArch64::X1)
-            .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask)
-            .addImm(0),
+        MCInstBuilder(AArch64::MOVZXi) 
+            .addReg(AArch64::X1) 
+            .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask) 
+            .addImm(0), 
         *STI);
-
-    if (CompileKernel) {
-      // The Linux kernel's dynamic loader doesn't support GOT relative
-      // relocations, but it doesn't support late binding either, so just call
-      // the function directly.
-      OutStreamer->emitInstruction(
-          MCInstBuilder(AArch64::B).addExpr(HwasanTagMismatchRef), *STI);
-    } else {
-      // Intentionally load the GOT entry and branch to it, rather than possibly
-      // late binding the function, which may clobber the registers before we
-      // have a chance to save them.
-      OutStreamer->emitInstruction(
-          MCInstBuilder(AArch64::ADRP)
-              .addReg(AArch64::X16)
-              .addExpr(AArch64MCExpr::create(
-                  HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
-                  OutContext)),
-          *STI);
-      OutStreamer->emitInstruction(
-          MCInstBuilder(AArch64::LDRXui)
-              .addReg(AArch64::X16)
-              .addReg(AArch64::X16)
-              .addExpr(AArch64MCExpr::create(
-                  HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
-                  OutContext)),
-          *STI);
-      OutStreamer->emitInstruction(
-          MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
-    }
+ 
+    if (CompileKernel) { 
+      // The Linux kernel's dynamic loader doesn't support GOT relative 
+      // relocations, but it doesn't support late binding either, so just call 
+      // the function directly. 
+      OutStreamer->emitInstruction( 
+          MCInstBuilder(AArch64::B).addExpr(HwasanTagMismatchRef), *STI); 
+    } else { 
+      // Intentionally load the GOT entry and branch to it, rather than possibly 
+      // late binding the function, which may clobber the registers before we 
+      // have a chance to save them. 
+      OutStreamer->emitInstruction( 
+          MCInstBuilder(AArch64::ADRP) 
+              .addReg(AArch64::X16) 
+              .addExpr(AArch64MCExpr::create( 
+                  HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE, 
+                  OutContext)), 
+          *STI); 
+      OutStreamer->emitInstruction( 
+          MCInstBuilder(AArch64::LDRXui) 
+              .addReg(AArch64::X16) 
+              .addReg(AArch64::X16) 
+              .addExpr(AArch64MCExpr::create( 
+                  HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12, 
+                  OutContext)), 
+          *STI); 
+      OutStreamer->emitInstruction( 
+          MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI); 
+    } 
   }
 }
 
@@ -550,11 +550,11 @@ void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
     // generates code that does this, it is always safe to set.
     OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
-  
-  // Emit stack and fault map information.
+   
+  // Emit stack and fault map information. 
   emitStackMaps(SM);
-  FM.serializeToFaultMapSection();
-
+  FM.serializeToFaultMapSection(); 
+ 
 }
 
 void AArch64AsmPrinter::EmitLOHs() {
@@ -647,8 +647,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
   const TargetRegisterInfo *RI = STI->getRegisterInfo();
   Register Reg = MO.getReg();
   unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
-  if (!RI->regsOverlap(RegToPrint, Reg))
-    return true;
+  if (!RI->regsOverlap(RegToPrint, Reg)) 
+    return true; 
   O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName);
   return false;
 }
@@ -809,24 +809,24 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
     emitAlignment(Align(Size));
     OutStreamer->emitLabel(GetJTISymbol(JTI));
 
-    const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
-    const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
-
-    for (auto *JTBB : JTBBs) {
-      const MCExpr *Value =
-          MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext);
-
-      // Each entry is:
-      //     .byte/.hword (LBB - Lbase)>>2
-      // or plain:
-      //     .word LBB - Lbase
-      Value = MCBinaryExpr::createSub(Value, Base, OutContext);
-      if (Size != 4)
-        Value = MCBinaryExpr::createLShr(
-            Value, MCConstantExpr::create(2, OutContext), OutContext);
-
-      OutStreamer->emitValue(Value, Size);
-    }
+    const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI); 
+    const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext); 
+
+    for (auto *JTBB : JTBBs) { 
+      const MCExpr *Value = 
+          MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext); 
+
+      // Each entry is: 
+      //     .byte/.hword (LBB - Lbase)>>2 
+      // or plain: 
+      //     .word LBB - Lbase 
+      Value = MCBinaryExpr::createSub(Value, Base, OutContext); 
+      if (Size != 4) 
+        Value = MCBinaryExpr::createLShr( 
+            Value, MCConstantExpr::create(2, OutContext), OutContext); 
+ 
+      OutStreamer->emitValue(Value, Size); 
+    } 
   }
 }
 
@@ -851,9 +851,9 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() {
 ///
 ///             adr xDest, .LBB0_0
 ///             ldrb wScratch, [xTable, xEntry]   (with "lsl #1" for ldrh).
-///             add xDest, xDest, xScratch (with "lsl #2" for smaller entries)
-void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
-                                           const llvm::MachineInstr &MI) {
+///             add xDest, xDest, xScratch (with "lsl #2" for smaller entries) 
+void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer, 
+                                           const llvm::MachineInstr &MI) { 
   Register DestReg = MI.getOperand(0).getReg();
   Register ScratchReg = MI.getOperand(1).getReg();
   Register ScratchRegW =
@@ -861,50 +861,50 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
   Register TableReg = MI.getOperand(2).getReg();
   Register EntryReg = MI.getOperand(3).getReg();
   int JTIdx = MI.getOperand(4).getIndex();
-  int Size = AArch64FI->getJumpTableEntrySize(JTIdx);
+  int Size = AArch64FI->getJumpTableEntrySize(JTIdx); 
 
   // This has to be first because the compression pass based its reachability
   // calculations on the start of the JumpTableDest instruction.
   auto Label =
       MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
-
-  // If we don't already have a symbol to use as the base, use the ADR
-  // instruction itself.
-  if (!Label) {
-    Label = MF->getContext().createTempSymbol();
-    AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label);
-    OutStreamer.emitLabel(Label);
-  }
-
-  auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext());
+ 
+  // If we don't already have a symbol to use as the base, use the ADR 
+  // instruction itself. 
+  if (!Label) { 
+    Label = MF->getContext().createTempSymbol(); 
+    AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label); 
+    OutStreamer.emitLabel(Label); 
+  } 
+ 
+  auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext()); 
   EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
                                   .addReg(DestReg)
-                                  .addExpr(LabelExpr));
+                                  .addExpr(LabelExpr)); 
 
   // Load the number of instruction-steps to offset from the label.
-  unsigned LdrOpcode;
-  switch (Size) {
-  case 1: LdrOpcode = AArch64::LDRBBroX; break;
-  case 2: LdrOpcode = AArch64::LDRHHroX; break;
-  case 4: LdrOpcode = AArch64::LDRSWroX; break;
-  default:
-    llvm_unreachable("Unknown jump table size");
-  }
-
+  unsigned LdrOpcode; 
+  switch (Size) { 
+  case 1: LdrOpcode = AArch64::LDRBBroX; break; 
+  case 2: LdrOpcode = AArch64::LDRHHroX; break; 
+  case 4: LdrOpcode = AArch64::LDRSWroX; break; 
+  default: 
+    llvm_unreachable("Unknown jump table size"); 
+  } 
+ 
   EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
-                                  .addReg(Size == 4 ? ScratchReg : ScratchRegW)
+                                  .addReg(Size == 4 ? ScratchReg : ScratchRegW) 
                                   .addReg(TableReg)
                                   .addReg(EntryReg)
                                   .addImm(0)
-                                  .addImm(Size == 1 ? 0 : 1));
+                                  .addImm(Size == 1 ? 0 : 1)); 
 
-  // Add to the already materialized base label address, multiplying by 4 if
-  // compressed.
+  // Add to the already materialized base label address, multiplying by 4 if 
+  // compressed. 
   EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
                                   .addReg(DestReg)
                                   .addReg(DestReg)
                                   .addReg(ScratchReg)
-                                  .addImm(Size == 4 ? 0 : 2));
+                                  .addImm(Size == 4 ? 0 : 2)); 
 }
 
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -982,83 +982,83 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
 }
 
-void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                                        const MachineInstr &MI) {
-  StatepointOpers SOpers(&MI);
-  if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
-    assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
-    for (unsigned i = 0; i < PatchBytes; i += 4)
-      EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
-  } else {
-    // Lower call target and choose correct opcode
-    const MachineOperand &CallTarget = SOpers.getCallTarget();
-    MCOperand CallTargetMCOp;
-    unsigned CallOpcode;
-    switch (CallTarget.getType()) {
-    case MachineOperand::MO_GlobalAddress:
-    case MachineOperand::MO_ExternalSymbol:
-      MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp);
-      CallOpcode = AArch64::BL;
-      break;
-    case MachineOperand::MO_Immediate:
-      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
-      CallOpcode = AArch64::BL;
-      break;
-    case MachineOperand::MO_Register:
-      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
-      CallOpcode = AArch64::BLR;
-      break;
-    default:
-      llvm_unreachable("Unsupported operand type in statepoint call target");
-      break;
-    }
-
-    EmitToStreamer(OutStreamer,
-                   MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp));
-  }
-
-  auto &Ctx = OutStreamer.getContext();
-  MCSymbol *MILabel = Ctx.createTempSymbol();
-  OutStreamer.emitLabel(MILabel);
-  SM.recordStatepoint(*MILabel, MI);
-}
-
-void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
-  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
-  //                  <opcode>, <operands>
-
-  Register DefRegister = FaultingMI.getOperand(0).getReg();
-  FaultMaps::FaultKind FK =
-      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
-  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
-  unsigned Opcode = FaultingMI.getOperand(3).getImm();
-  unsigned OperandsBeginIdx = 4;
-
-  auto &Ctx = OutStreamer->getContext();
-  MCSymbol *FaultingLabel = Ctx.createTempSymbol();
-  OutStreamer->emitLabel(FaultingLabel);
-
-  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
-  FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
-
-  MCInst MI;
-  MI.setOpcode(Opcode);
-
-  if (DefRegister != (Register)0)
-    MI.addOperand(MCOperand::createReg(DefRegister));
-
-  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
-            E = FaultingMI.operands_end();
-       I != E; ++I) {
-    MCOperand Dest;
-    lowerOperand(*I, Dest);
-    MI.addOperand(Dest);
-  }
-
-  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
-  OutStreamer->emitInstruction(MI, getSubtargetInfo());
-}
-
+void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, 
+                                        const MachineInstr &MI) { 
+  StatepointOpers SOpers(&MI); 
+  if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 
+    assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 
+    for (unsigned i = 0; i < PatchBytes; i += 4) 
+      EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); 
+  } else { 
+    // Lower call target and choose correct opcode 
+    const MachineOperand &CallTarget = SOpers.getCallTarget(); 
+    MCOperand CallTargetMCOp; 
+    unsigned CallOpcode; 
+    switch (CallTarget.getType()) { 
+    case MachineOperand::MO_GlobalAddress: 
+    case MachineOperand::MO_ExternalSymbol: 
+      MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp); 
+      CallOpcode = AArch64::BL; 
+      break; 
+    case MachineOperand::MO_Immediate: 
+      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 
+      CallOpcode = AArch64::BL; 
+      break; 
+    case MachineOperand::MO_Register: 
+      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 
+      CallOpcode = AArch64::BLR; 
+      break; 
+    default: 
+      llvm_unreachable("Unsupported operand type in statepoint call target"); 
+      break; 
+    } 
+ 
+    EmitToStreamer(OutStreamer, 
+                   MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp)); 
+  } 
+ 
+  auto &Ctx = OutStreamer.getContext(); 
+  MCSymbol *MILabel = Ctx.createTempSymbol(); 
+  OutStreamer.emitLabel(MILabel); 
+  SM.recordStatepoint(*MILabel, MI); 
+} 
+ 
+void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) { 
+  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 
+  //                  <opcode>, <operands> 
+ 
+  Register DefRegister = FaultingMI.getOperand(0).getReg(); 
+  FaultMaps::FaultKind FK = 
+      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 
+  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 
+  unsigned Opcode = FaultingMI.getOperand(3).getImm(); 
+  unsigned OperandsBeginIdx = 4; 
+ 
+  auto &Ctx = OutStreamer->getContext(); 
+  MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 
+  OutStreamer->emitLabel(FaultingLabel); 
+ 
+  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 
+  FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 
+ 
+  MCInst MI; 
+  MI.setOpcode(Opcode); 
+ 
+  if (DefRegister != (Register)0) 
+    MI.addOperand(MCOperand::createReg(DefRegister)); 
+ 
+  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx, 
+            E = FaultingMI.operands_end(); 
+       I != E; ++I) { 
+    MCOperand Dest; 
+    lowerOperand(*I, Dest); 
+    MI.addOperand(Dest); 
+  } 
+ 
+  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 
+  OutStreamer->emitInstruction(MI, getSubtargetInfo()); 
+} 
+ 
 void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
   Register DestReg = MI.getOperand(0).getReg();
   if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
@@ -1272,28 +1272,28 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, Adrp);
 
     MCInst Ldr;
-    if (STI->isTargetILP32()) {
-      Ldr.setOpcode(AArch64::LDRWui);
-      Ldr.addOperand(MCOperand::createReg(AArch64::W1));
-    } else {
-      Ldr.setOpcode(AArch64::LDRXui);
-      Ldr.addOperand(MCOperand::createReg(AArch64::X1));
-    }
+    if (STI->isTargetILP32()) { 
+      Ldr.setOpcode(AArch64::LDRWui); 
+      Ldr.addOperand(MCOperand::createReg(AArch64::W1)); 
+    } else { 
+      Ldr.setOpcode(AArch64::LDRXui); 
+      Ldr.addOperand(MCOperand::createReg(AArch64::X1)); 
+    } 
     Ldr.addOperand(MCOperand::createReg(AArch64::X0));
     Ldr.addOperand(SymTLSDescLo12);
     Ldr.addOperand(MCOperand::createImm(0));
     EmitToStreamer(*OutStreamer, Ldr);
 
     MCInst Add;
-    if (STI->isTargetILP32()) {
-      Add.setOpcode(AArch64::ADDWri);
-      Add.addOperand(MCOperand::createReg(AArch64::W0));
-      Add.addOperand(MCOperand::createReg(AArch64::W0));
-    } else {
-      Add.setOpcode(AArch64::ADDXri);
-      Add.addOperand(MCOperand::createReg(AArch64::X0));
-      Add.addOperand(MCOperand::createReg(AArch64::X0));
-    }
+    if (STI->isTargetILP32()) { 
+      Add.setOpcode(AArch64::ADDWri); 
+      Add.addOperand(MCOperand::createReg(AArch64::W0)); 
+      Add.addOperand(MCOperand::createReg(AArch64::W0)); 
+    } else { 
+      Add.setOpcode(AArch64::ADDXri); 
+      Add.addOperand(MCOperand::createReg(AArch64::X0)); 
+      Add.addOperand(MCOperand::createReg(AArch64::X0)); 
+    } 
     Add.addOperand(SymTLSDescLo12);
     Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
     EmitToStreamer(*OutStreamer, Add);
@@ -1313,10 +1313,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     return;
   }
 
-  case AArch64::JumpTableDest32:
+  case AArch64::JumpTableDest32: 
   case AArch64::JumpTableDest16:
   case AArch64::JumpTableDest8:
-    LowerJumpTableDest(*OutStreamer, *MI);
+    LowerJumpTableDest(*OutStreamer, *MI); 
     return;
 
   case AArch64::FMOVH0:
@@ -1331,12 +1331,12 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case TargetOpcode::PATCHPOINT:
     return LowerPATCHPOINT(*OutStreamer, SM, *MI);
 
-  case TargetOpcode::STATEPOINT:
-    return LowerSTATEPOINT(*OutStreamer, SM, *MI);
-
-  case TargetOpcode::FAULTING_OP:
-    return LowerFAULTING_OP(*MI);
-
+  case TargetOpcode::STATEPOINT: 
+    return LowerSTATEPOINT(*OutStreamer, SM, *MI); 
+ 
+  case TargetOpcode::FAULTING_OP: 
+    return LowerFAULTING_OP(*MI); 
+ 
   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
     LowerPATCHABLE_FUNCTION_ENTER(*MI);
     return;
@@ -1381,14 +1381,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     return;
 
   case AArch64::SEH_SaveRegP:
-    if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
-        MI->getOperand(0).getImm() <= 28) {
-      assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
-             "Register paired with LR must be odd");
-      TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
-                                    MI->getOperand(2).getImm());
-      return;
-    }
+    if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 && 
+        MI->getOperand(0).getImm() <= 28) { 
+      assert((MI->getOperand(0).getImm() - 19) % 2 == 0 && 
+             "Register paired with LR must be odd"); 
+      TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(), 
+                                    MI->getOperand(2).getImm()); 
+      return; 
+    } 
     assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
             "Non-consecutive registers not allowed for save_regp");
     TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
index d3b5166585..12a4c8ce9d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -16,7 +16,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineFunctionInfo.h" 
 #include "AArch64Subtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -58,13 +58,13 @@ FunctionPass *llvm::createAArch64BranchTargetsPass() {
 }
 
 bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
-  if (!MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+  if (!MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 
     return false;
 
   LLVM_DEBUG(
       dbgs() << "********** AArch64 Branch Targets  **********\n"
              << "********** Function: " << MF.getName() << '\n');
-  const Function &F = MF.getFunction();
+  const Function &F = MF.getFunction(); 
 
   // LLVM does not consider basic blocks which are the targets of jump tables
   // to be address-taken (the address can't escape anywhere else), but they are
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
index c51dd48cab..ab1a31e1e7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -42,51 +42,51 @@ static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
                              MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
                              CCState &State, Align SlotAlign) {
-  if (LocVT.isScalableVector()) {
-    const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
-        State.getMachineFunction().getSubtarget());
-    const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
-
-    // We are about to reinvoke the CCAssignFn auto-generated handler. If we
-    // don't unset these flags we will get stuck in an infinite loop forever
-    // invoking the custom handler.
-    ArgFlags.setInConsecutiveRegs(false);
-    ArgFlags.setInConsecutiveRegsLast(false);
-
-    // The calling convention for passing SVE tuples states that in the event
-    // we cannot allocate enough registers for the tuple we should still leave
-    // any remaining registers unallocated. However, when we call the
-    // CCAssignFn again we want it to behave as if all remaining registers are
-    // allocated. This will force the code to pass the tuple indirectly in
-    // accordance with the PCS.
-    bool RegsAllocated[8];
-    for (int I = 0; I < 8; I++) {
-      RegsAllocated[I] = State.isAllocated(ZRegList[I]);
-      State.AllocateReg(ZRegList[I]);
-    }
-
-    auto &It = PendingMembers[0];
-    CCAssignFn *AssignFn =
-        TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
-    if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
-                 ArgFlags, State))
-      llvm_unreachable("Call operand has unhandled type");
-
-    // Return the flags to how they were before.
-    ArgFlags.setInConsecutiveRegs(true);
-    ArgFlags.setInConsecutiveRegsLast(true);
-
-    // Return the register state back to how it was before, leaving any
-    // unallocated registers available for other smaller types.
-    for (int I = 0; I < 8; I++)
-      if (!RegsAllocated[I])
-        State.DeallocateReg(ZRegList[I]);
-
-    // All pending members have now been allocated
-    PendingMembers.clear();
-    return true;
-  }
-
+  if (LocVT.isScalableVector()) { 
+    const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( 
+        State.getMachineFunction().getSubtarget()); 
+    const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); 
+ 
+    // We are about to reinvoke the CCAssignFn auto-generated handler. If we 
+    // don't unset these flags we will get stuck in an infinite loop forever 
+    // invoking the custom handler. 
+    ArgFlags.setInConsecutiveRegs(false); 
+    ArgFlags.setInConsecutiveRegsLast(false); 
+ 
+    // The calling convention for passing SVE tuples states that in the event 
+    // we cannot allocate enough registers for the tuple we should still leave 
+    // any remaining registers unallocated. However, when we call the 
+    // CCAssignFn again we want it to behave as if all remaining registers are 
+    // allocated. This will force the code to pass the tuple indirectly in 
+    // accordance with the PCS. 
+    bool RegsAllocated[8]; 
+    for (int I = 0; I < 8; I++) { 
+      RegsAllocated[I] = State.isAllocated(ZRegList[I]); 
+      State.AllocateReg(ZRegList[I]); 
+    } 
+ 
+    auto &It = PendingMembers[0]; 
+    CCAssignFn *AssignFn = 
+        TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false); 
+    if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, 
+                 ArgFlags, State)) 
+      llvm_unreachable("Call operand has unhandled type"); 
+ 
+    // Return the flags to how they were before. 
+    ArgFlags.setInConsecutiveRegs(true); 
+    ArgFlags.setInConsecutiveRegsLast(true); 
+ 
+    // Return the register state back to how it was before, leaving any 
+    // unallocated registers available for other smaller types. 
+    for (int I = 0; I < 8; I++) 
+      if (!RegsAllocated[I]) 
+        State.DeallocateReg(ZRegList[I]); 
+ 
+    // All pending members have now been allocated 
+    PendingMembers.clear(); 
+    return true; 
+  } 
+ 
   unsigned Size = LocVT.getSizeInBits() / 8;
   const Align StackAlign =
       State.getMachineFunction().getDataLayout().getStackAlignment();
@@ -191,11 +191,11 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     return true;
   }
 
-  if (!LocVT.isScalableVector()) {
-    // Mark all regs in the class as unavailable
-    for (auto Reg : RegList)
-      State.AllocateReg(Reg);
-  }
+  if (!LocVT.isScalableVector()) { 
+    // Mark all regs in the class as unavailable 
+    for (auto Reg : RegList) 
+      State.AllocateReg(Reg); 
+  } 
 
   const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
index b1e714653f..03d92b8d50 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
@@ -75,68 +75,68 @@ def ext: GICombineRule <
 // instruction.
 def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
 
-def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
-def vashr_vlshr_imm : GICombineRule<
-  (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
-  (match (wip_match_opcode G_ASHR, G_LSHR):$root,
-          [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
->;
-
-def form_duplane_matchdata :
-  GIDefMatchData<"std::pair<unsigned, int>">;
-def form_duplane : GICombineRule <
-  (defs root:$root, form_duplane_matchdata:$matchinfo),
-  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
-          [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-def adjust_icmp_imm_matchdata :
-  GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
-def adjust_icmp_imm : GICombineRule <
-  (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
-  (match (wip_match_opcode G_ICMP):$root,
-          [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
->;
-
-def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
-
-def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
-def extractvecelt_pairwise_add : GICombineRule<
-  (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
-  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
-          [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
-def mul_const : GICombineRule<
-  (defs root:$root, mul_const_matchdata:$matchinfo),
-  (match (wip_match_opcode G_MUL):$root,
-          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
-  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-// Post-legalization combines which should happen at all optimization levels.
-// (E.g. ones that facilitate matching for the selector) For example, matching
-// pseudos.
-def AArch64PostLegalizerLoweringHelper
-    : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
-                       [shuffle_vector_pseudos, vashr_vlshr_imm,
-                        icmp_lowering, form_duplane]> {
-  let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
-}
-
-// Post-legalization combines which are primarily optimizations.
+def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">; 
+def vashr_vlshr_imm : GICombineRule< 
+  (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo), 
+  (match (wip_match_opcode G_ASHR, G_LSHR):$root, 
+          [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]), 
+  (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]) 
+>; 
+ 
+def form_duplane_matchdata : 
+  GIDefMatchData<"std::pair<unsigned, int>">; 
+def form_duplane : GICombineRule < 
+  (defs root:$root, form_duplane_matchdata:$matchinfo), 
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, 
+          [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]), 
+  (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }]) 
+>; 
+ 
+def adjust_icmp_imm_matchdata : 
+  GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">; 
+def adjust_icmp_imm : GICombineRule < 
+  (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo), 
+  (match (wip_match_opcode G_ICMP):$root, 
+          [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]), 
+  (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }]) 
+>; 
+ 
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>; 
+ 
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">; 
+def extractvecelt_pairwise_add : GICombineRule< 
+  (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo), 
+  (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, 
+          [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]), 
+  (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }]) 
+>; 
+ 
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">; 
+def mul_const : GICombineRule< 
+  (defs root:$root, mul_const_matchdata:$matchinfo), 
+  (match (wip_match_opcode G_MUL):$root, 
+          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]), 
+  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }]) 
+>; 
+ 
+// Post-legalization combines which should happen at all optimization levels. 
+// (E.g. ones that facilitate matching for the selector) For example, matching 
+// pseudos. 
+def AArch64PostLegalizerLoweringHelper 
+    : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper", 
+                       [shuffle_vector_pseudos, vashr_vlshr_imm, 
+                        icmp_lowering, form_duplane]> { 
+  let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule"; 
+} 
+ 
+// Post-legalization combines which are primarily optimizations. 
 def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
-                       [copy_prop, erase_undef_store, combines_for_extload,
-                        sext_trunc_sextload,
-                        hoist_logic_op_with_same_opcode_hands,
-                        redundant_and, xor_of_and_with_same_reg,
-                        extractvecelt_pairwise_add, redundant_or,
-                        mul_const]> {
+                       [copy_prop, erase_undef_store, combines_for_extload, 
+                        sext_trunc_sextload, 
+                        hoist_logic_op_with_same_opcode_hands, 
+                        redundant_and, xor_of_and_with_same_reg, 
+                        extractvecelt_pairwise_add, redundant_or, 
+                        mul_const]> { 
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 2328a8b4de..d419598aaa 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -37,14 +37,14 @@ class AArch64CompressJumpTables : public MachineFunctionPass {
   MachineFunction *MF;
   SmallVector<int, 8> BlockInfo;
 
-  /// Returns the size in instructions of the block \p MBB, or None if we
-  /// couldn't get a safe upper bound.
-  Optional<int> computeBlockSize(MachineBasicBlock &MBB);
-
-  /// Gather information about the function, returns false if we can't perform
-  /// this optimization for some reason.
-  bool scanFunction();
-
+  /// Returns the size in instructions of the block \p MBB, or None if we 
+  /// couldn't get a safe upper bound. 
+  Optional<int> computeBlockSize(MachineBasicBlock &MBB); 
+
+  /// Gather information about the function, returns false if we can't perform 
+  /// this optimization for some reason. 
+  bool scanFunction(); 
+ 
   bool compressJumpTable(MachineInstr &MI, int Offset);
 
 public:
@@ -64,27 +64,27 @@ public:
   }
 };
 char AArch64CompressJumpTables::ID = 0;
-} // namespace
+} // namespace 
 
 INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
                 "AArch64 compress jump tables pass", false, false)
 
-Optional<int>
-AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+Optional<int> 
+AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) { 
   int Size = 0;
-  for (const MachineInstr &MI : MBB) {
-    // Inline asm may contain some directives like .bytes which we don't
-    // currently have the ability to parse accurately. To be safe, just avoid
-    // computing a size and bail out.
-    if (MI.getOpcode() == AArch64::INLINEASM ||
-        MI.getOpcode() == AArch64::INLINEASM_BR)
-      return None;
+  for (const MachineInstr &MI : MBB) { 
+    // Inline asm may contain some directives like .bytes which we don't 
+    // currently have the ability to parse accurately. To be safe, just avoid 
+    // computing a size and bail out. 
+    if (MI.getOpcode() == AArch64::INLINEASM || 
+        MI.getOpcode() == AArch64::INLINEASM_BR) 
+      return None; 
     Size += TII->getInstSizeInBytes(MI);
-  }
+  } 
   return Size;
 }
 
-bool AArch64CompressJumpTables::scanFunction() {
+bool AArch64CompressJumpTables::scanFunction() { 
   BlockInfo.clear();
   BlockInfo.resize(MF->getNumBlockIDs());
 
@@ -97,12 +97,12 @@ bool AArch64CompressJumpTables::scanFunction() {
     else
       AlignedOffset = alignTo(Offset, Alignment);
     BlockInfo[MBB.getNumber()] = AlignedOffset;
-    auto BlockSize = computeBlockSize(MBB);
-    if (!BlockSize)
-      return false;
-    Offset = AlignedOffset + *BlockSize;
+    auto BlockSize = computeBlockSize(MBB); 
+    if (!BlockSize) 
+      return false; 
+    Offset = AlignedOffset + *BlockSize; 
   }
-  return true;
+  return true; 
 }
 
 bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
@@ -121,7 +121,7 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
   int MaxOffset = std::numeric_limits<int>::min(),
       MinOffset = std::numeric_limits<int>::max();
   MachineBasicBlock *MinBlock = nullptr;
-  for (auto *Block : JT.MBBs) {
+  for (auto *Block : JT.MBBs) { 
     int BlockOffset = BlockInfo[Block->getNumber()];
     assert(BlockOffset % 4 == 0 && "misaligned basic block");
 
@@ -141,14 +141,14 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
   }
 
   int Span = MaxOffset - MinOffset;
-  auto *AFI = MF->getInfo<AArch64FunctionInfo>();
+  auto *AFI = MF->getInfo<AArch64FunctionInfo>(); 
   if (isUInt<8>(Span / 4)) {
     AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
     MI.setDesc(TII->get(AArch64::JumpTableDest8));
     ++NumJT8;
     return true;
-  }
-  if (isUInt<16>(Span / 4)) {
+  } 
+  if (isUInt<16>(Span / 4)) { 
     AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
     MI.setDesc(TII->get(AArch64::JumpTableDest16));
     ++NumJT16;
@@ -169,8 +169,8 @@ bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
   if (ST.force32BitJumpTables() && !MF->getFunction().hasMinSize())
     return false;
 
-  if (!scanFunction())
-    return false;
+  if (!scanFunction()) 
+    return false; 
 
   for (MachineBasicBlock &MBB : *MF) {
     int Offset = BlockInfo[MBB.getNumber()];
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index e57650ae60..1a8731883f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -83,8 +83,8 @@ private:
   bool expandSVESpillFill(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, unsigned Opc,
                           unsigned N);
-  bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MBBI);
+  bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 
+                           MachineBasicBlock::iterator MBBI); 
 };
 
 } // end anonymous namespace
@@ -629,46 +629,46 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
   return true;
 }
 
-bool AArch64ExpandPseudo::expandCALL_RVMARKER(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
-  // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
-  // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
-  // in between.
-  MachineInstr &MI = *MBBI;
-
-  MachineInstr *OriginalCall;
-  MachineOperand &CallTarget = MI.getOperand(0);
-  assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
-         "invalid operand for regular call");
-  unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
-  OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
-  OriginalCall->addOperand(CallTarget);
-
-  unsigned RegMaskStartIdx = 1;
-  // Skip register arguments. Those are added during ISel, but are not
-  // needed for the concrete branch.
-  while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
-    assert(MI.getOperand(RegMaskStartIdx).isReg() &&
-           "should only skip register operands");
-    RegMaskStartIdx++;
-  }
-  for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
-    OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
-
-  auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
-                     .addReg(AArch64::FP, RegState::Define)
-                     .addReg(AArch64::XZR)
-                     .addReg(AArch64::FP)
-                     .addImm(0)
-                     .getInstr();
-  if (MI.shouldUpdateCallSiteInfo())
-    MBB.getParent()->moveCallSiteInfo(&MI, Marker);
-  MI.eraseFromParent();
-  finalizeBundle(MBB, OriginalCall->getIterator(),
-                 std::next(Marker->getIterator()));
-  return true;
-}
-
+bool AArch64ExpandPseudo::expandCALL_RVMARKER( 
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 
+  // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, 
+  // x29` marker. Mark the sequence as bundle, to avoid passes moving other code 
+  // in between. 
+  MachineInstr &MI = *MBBI; 
+ 
+  MachineInstr *OriginalCall; 
+  MachineOperand &CallTarget = MI.getOperand(0); 
+  assert((CallTarget.isGlobal() || CallTarget.isReg()) && 
+         "invalid operand for regular call"); 
+  unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 
+  OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 
+  OriginalCall->addOperand(CallTarget); 
+ 
+  unsigned RegMaskStartIdx = 1; 
+  // Skip register arguments. Those are added during ISel, but are not 
+  // needed for the concrete branch. 
+  while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 
+    assert(MI.getOperand(RegMaskStartIdx).isReg() && 
+           "should only skip register operands"); 
+    RegMaskStartIdx++; 
+  } 
+  for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx) 
+    OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx)); 
+ 
+  auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 
+                     .addReg(AArch64::FP, RegState::Define) 
+                     .addReg(AArch64::XZR) 
+                     .addReg(AArch64::FP) 
+                     .addImm(0) 
+                     .getInstr(); 
+  if (MI.shouldUpdateCallSiteInfo()) 
+    MBB.getParent()->moveCallSiteInfo(&MI, Marker); 
+  MI.eraseFromParent(); 
+  finalizeBundle(MBB, OriginalCall->getIterator(), 
+                 std::next(Marker->getIterator())); 
+  return true; 
+} 
+ 
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1056,8 +1056,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
    case AArch64::LDR_ZZXI:
      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
-   case AArch64::BLR_RVMARKER:
-     return expandCALL_RVMARKER(MBB, MBBI);
+   case AArch64::BLR_RVMARKER: 
+     return expandCALL_RVMARKER(MBB, MBBI); 
   }
   return false;
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 209f9f7255..afd8765f45 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -54,7 +54,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
+#define DEBUG_TYPE "aarch64-falkor-hwpf-fix" 
 
 STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
 STATISTIC(NumCollisionsAvoided,
@@ -146,7 +146,7 @@ bool FalkorMarkStridedAccesses::run() {
 
 bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
   // Only mark strided loads in the inner-most loop
-  if (!L.isInnermost())
+  if (!L.isInnermost()) 
     return false;
 
   bool MadeChange = false;
@@ -224,10 +224,10 @@ struct LoadInfo {
 
 char FalkorHWPFFix::ID = 0;
 
-INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
+INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late", 
                       "Falkor HW Prefetch Fix Late Phase", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
+INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late", 
                     "Falkor HW Prefetch Fix Late Phase", false, false)
 
 static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
@@ -830,7 +830,7 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
   for (MachineLoop *I : LI)
     for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
       // Only process inner-loops
-      if (L->isInnermost())
+      if (L->isInnermost()) 
         runOnLoop(**L, Fn);
 
   return Modified;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
index 9801036653..b4e4233448 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3409,7 +3409,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
   const Value *RHS = II->getArgOperand(1);
 
   // Canonicalize immediate to the RHS.
-  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
+  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 
     std::swap(LHS, RHS);
 
   // Simplify multiplies.
@@ -3651,10 +3651,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
         .addImm(1);
     return true;
-  case Intrinsic::debugtrap:
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
-        .addImm(0xF000);
-    return true;
+  case Intrinsic::debugtrap: 
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 
+        .addImm(0xF000); 
+    return true; 
 
   case Intrinsic::sqrt: {
     Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -3696,7 +3696,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     const Value *LHS = II->getArgOperand(0);
     const Value *RHS = II->getArgOperand(1);
     // Canonicalize immediate to the RHS.
-    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
+    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 
       std::swap(LHS, RHS);
 
     // Simplify multiplies.
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
index 65ee501604..9aa8f7a804 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -175,10 +175,10 @@ static cl::opt<bool> StackTaggingMergeSetTag(
     cl::desc("merge settag instruction in function epilog"), cl::init(true),
     cl::Hidden);
 
-static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
-                                       cl::desc("sort stack allocations"),
-                                       cl::init(true), cl::Hidden);
-
+static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", 
+                                       cl::desc("sort stack allocations"), 
+                                       cl::init(true), cl::Hidden); 
+ 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
 /// Returns the argument pop size.
@@ -249,7 +249,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
 
 TargetStackID::Value
 AArch64FrameLowering::getStackIDForScalableVectors() const {
-  return TargetStackID::ScalableVector;
+  return TargetStackID::ScalableVector; 
 }
 
 /// Returns the size of the fixed object area (allocated next to sp on entry)
@@ -273,7 +273,7 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
 /// Returns the size of the entire SVE stackframe (calleesaves + spills).
 static StackOffset getSVEStackSize(const MachineFunction &MF) {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
+  return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); 
 }
 
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -365,15 +365,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
       // Most call frames will be allocated at the start of a function so
       // this is OK, but it is a limitation that needs dealing with.
       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
-      emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
-                      StackOffset::getFixed(Amount), TII);
+      emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, 
+                      StackOffset::getFixed(Amount), TII); 
     }
   } else if (CalleePopAmount != 0) {
     // If the calling convention demands that the callee pops arguments from the
     // stack, we want to add it back if we have a reserved call frame.
     assert(CalleePopAmount < 0xffffff && "call frame too large");
     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
+                    StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); 
   }
   return MBB.erase(I);
 }
@@ -413,8 +413,8 @@ static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
 MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
     const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
   int64_t NumBytes, NumVGScaledBytes;
-  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
-                                                        NumVGScaledBytes);
+  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes, 
+                                                        NumVGScaledBytes); 
 
   std::string CommentBuffer = "sp";
   llvm::raw_string_ostream Comment(CommentBuffer);
@@ -441,8 +441,8 @@ MCCFIInstruction AArch64FrameLowering::createCfaOffset(
     const TargetRegisterInfo &TRI, unsigned Reg,
     const StackOffset &OffsetFromDefCFA) const {
   int64_t NumBytes, NumVGScaledBytes;
-  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
-      OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
+  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 
+      OffsetFromDefCFA, NumBytes, NumVGScaledBytes); 
 
   unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
 
@@ -496,14 +496,14 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
       continue;
 
     StackOffset Offset;
-    if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
+    if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) { 
       AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-      Offset =
-          StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
-          StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+      Offset = 
+          StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - 
+          StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); 
     } else {
-      Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
-                                     getOffsetOfLocalArea());
+      Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) - 
+                                     getOffsetOfLocalArea()); 
     }
     unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -584,12 +584,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
          !F.hasFnAttribute("no-stack-arg-probe");
 }
 
-static bool needsWinCFI(const MachineFunction &MF) {
-  const Function &F = MF.getFunction();
-  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-         F.needsUnwindTableEntry();
-}
-
+static bool needsWinCFI(const MachineFunction &MF) { 
+  const Function &F = MF.getFunction(); 
+  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 
+         F.needsUnwindTableEntry(); 
+} 
+ 
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     MachineFunction &MF, uint64_t StackBumpBytes) const {
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -600,18 +600,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
   if (AFI->getLocalStackSize() == 0)
     return false;
 
-  // For WinCFI, if optimizing for size, prefer to not combine the stack bump
-  // (to force a stp with predecrement) to match the packed unwind format,
-  // provided that there actually are any callee saved registers to merge the
-  // decrement with.
-  // This is potentially marginally slower, but allows using the packed
-  // unwind format for functions that both have a local area and callee saved
-  // registers. Using the packed unwind format notably reduces the size of
-  // the unwind info.
-  if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
-      MF.getFunction().hasOptSize())
-    return false;
-
+  // For WinCFI, if optimizing for size, prefer to not combine the stack bump 
+  // (to force a stp with predecrement) to match the packed unwind format, 
+  // provided that there actually are any callee saved registers to merge the 
+  // decrement with. 
+  // This is potentially marginally slower, but allows using the packed 
+  // unwind format for functions that both have a local area and callee saved 
+  // registers. Using the packed unwind format notably reduces the size of 
+  // the unwind info. 
+  if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && 
+      MF.getFunction().hasOptSize()) 
+    return false; 
+ 
   // 512 is the maximum immediate for stp/ldp that will be used for
   // callee-save save/restores
   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
@@ -1051,16 +1051,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // to determine the end of the prologue.
   DebugLoc DL;
 
-  const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
-  if (MFnI.shouldSignReturnAddress()) {
-    if (MFnI.shouldSignWithBKey()) {
+  const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); 
+  if (MFnI.shouldSignReturnAddress()) { 
+    if (MFnI.shouldSignWithBKey()) { 
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
           .setMIFlag(MachineInstr::FrameSetup);
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
           .setMIFlag(MachineInstr::FrameSetup);
-    } else {
-      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
-          .setMIFlag(MachineInstr::FrameSetup);
+    } else { 
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP)) 
+          .setMIFlag(MachineInstr::FrameSetup); 
     }
 
     unsigned CFIIndex =
@@ -1075,13 +1075,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
 
-  // Set tagged base pointer to the requested stack slot.
+  // Set tagged base pointer to the requested stack slot. 
   // Ideally it should match SP value after prologue.
-  Optional<int> TBPI = AFI->getTaggedBasePointerIndex();
-  if (TBPI)
-    AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
-  else
-    AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+  Optional<int> TBPI = AFI->getTaggedBasePointerIndex(); 
+  if (TBPI) 
+    AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); 
+  else 
+    AFI->setTaggedBasePointerOffset(MFI.getStackSize()); 
 
   const StackOffset &SVEStackSize = getSVEStackSize(MF);
 
@@ -1108,8 +1108,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++NumRedZoneFunctions;
     } else {
       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                      StackOffset::getFixed(-NumBytes), TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+                      StackOffset::getFixed(-NumBytes), TII, 
+                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 
       if (!NeedsWinCFI && needsFrameMoves) {
         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
         MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -1142,8 +1142,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (CombineSPBump) {
     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(-NumBytes), TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+                    StackOffset::getFixed(-NumBytes), TII, 
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 
     NumBytes = 0;
   } else if (PrologueSaveSize != 0) {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
@@ -1167,7 +1167,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // For funclets the FP belongs to the containing function.
   if (!IsFunclet && HasFP) {
     // Only set up FP if we actually need to.
-    int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+    int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); 
 
     if (CombineSPBump)
       FPOffset += AFI->getLocalStackSize();
@@ -1177,8 +1177,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     // This code marks the instruction(s) that set the FP also.
     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
-                    StackOffset::getFixed(FPOffset), TII,
-                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+                    StackOffset::getFixed(FPOffset), TII, 
+                    MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 
   }
 
   if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1288,7 +1288,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       ++MBBI;
     CalleeSavesEnd = MBBI;
 
-    AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
+    AllocateBefore = StackOffset::getScalable(CalleeSavedSize); 
     AllocateAfter = SVEStackSize - AllocateBefore;
   }
 
@@ -1320,8 +1320,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       // the correct value here, as NumBytes also includes padding bytes,
       // which shouldn't be counted here.
       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
-                      StackOffset::getFixed(-NumBytes), TII,
-                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+                      StackOffset::getFixed(-NumBytes), TII, 
+                      MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 
 
     if (NeedsRealignment) {
       const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
@@ -1458,15 +1458,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     //     .cfi_offset w28, -32
 
     if (HasFP) {
-      const int OffsetToFirstCalleeSaveFromFP =
-          AFI->getCalleeSaveBaseToFrameRecordOffset() -
-          AFI->getCalleeSavedStackSize();
-      Register FramePtr = RegInfo->getFrameRegister(MF);
-
+      const int OffsetToFirstCalleeSaveFromFP = 
+          AFI->getCalleeSaveBaseToFrameRecordOffset() - 
+          AFI->getCalleeSavedStackSize(); 
+      Register FramePtr = RegInfo->getFrameRegister(MF); 
+ 
       // Define the current CFA rule to use the provided FP.
       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
       unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+          MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); 
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -1476,7 +1476,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
         const TargetSubtargetInfo &STI = MF.getSubtarget();
         const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
         StackOffset TotalSize =
-            SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+            SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); 
         CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
       } else {
         // Encode the stack size of the leaf function.
@@ -1496,8 +1496,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
 
 static void InsertReturnAddressAuth(MachineFunction &MF,
                                     MachineBasicBlock &MBB) {
-  const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
-  if (!MFI.shouldSignReturnAddress())
+  const auto &MFI = *MF.getInfo<AArch64FunctionInfo>(); 
+  if (!MFI.shouldSignReturnAddress()) 
     return;
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -1511,16 +1511,16 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
   // this instruction can safely used for any v8a architecture.
   // From v8.3a onwards there are optimised authenticate LR and return
   // instructions, namely RETA{A,B}, that can be used instead.
-  if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
+  if (Subtarget.hasPAuth() && MBBI != MBB.end() && 
       MBBI->getOpcode() == AArch64::RET_ReallyLR) {
     BuildMI(MBB, MBBI, DL,
-            TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
+            TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA)) 
         .copyImplicitOps(*MBBI);
     MBB.erase(MBBI);
   } else {
     BuildMI(
         MBB, MBBI, DL,
-        TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
+        TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP)) 
         .setMIFlag(MachineInstr::FrameDestroy);
   }
 }
@@ -1545,7 +1545,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   bool NeedsWinCFI = needsWinCFI(MF);
   bool HasWinCFI = false;
   bool IsFunclet = false;
-  auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
+  auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); }); 
 
   if (MBB.end() != MBBI) {
     DL = MBBI->getDebugLoc();
@@ -1645,13 +1645,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         NeedsWinCFI, &HasWinCFI);
   }
 
-  if (MF.hasWinCFI()) {
-    // If the prologue didn't contain any SEH opcodes and didn't set the
-    // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
-    // EpilogStart - to avoid generating CFI for functions that don't need it.
-    // (And as we didn't generate any prologue at all, it would be asymmetrical
-    // to the epilogue.) By the end of the function, we assert that
-    // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
+  if (MF.hasWinCFI()) { 
+    // If the prologue didn't contain any SEH opcodes and didn't set the 
+    // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the 
+    // EpilogStart - to avoid generating CFI for functions that don't need it. 
+    // (And as we didn't generate any prologue at all, it would be asymmetrical 
+    // to the epilogue.) By the end of the function, we assert that 
+    // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption. 
     HasWinCFI = true;
     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
         .setMIFlag(MachineInstr::FrameDestroy);
@@ -1663,10 +1663,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   if (CombineSPBump) {
     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
-                    TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
-                    &HasWinCFI);
-    if (HasWinCFI)
+                    StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), 
+                    TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, 
+                    &HasWinCFI); 
+    if (HasWinCFI) 
       BuildMI(MBB, MBB.getFirstTerminator(), DL,
               TII->get(AArch64::SEH_EpilogEnd))
           .setMIFlag(MachineInstr::FrameDestroy);
@@ -1689,8 +1689,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     assert(IsSVECalleeSave(RestoreBegin) &&
            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
 
-    StackOffset CalleeSavedSizeAsOffset =
-        StackOffset::getScalable(CalleeSavedSize);
+    StackOffset CalleeSavedSizeAsOffset = 
+        StackOffset::getScalable(CalleeSavedSize); 
     DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
     DeallocateAfter = CalleeSavedSizeAsOffset;
   }
@@ -1703,15 +1703,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
         // be reloaded. The code below will deallocate the stack space
         // space by moving FP -> SP.
         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
-                        StackOffset::getScalable(-CalleeSavedSize), TII,
+                        StackOffset::getScalable(-CalleeSavedSize), TII, 
                         MachineInstr::FrameDestroy);
     } else {
       if (AFI->getSVECalleeSavedStackSize()) {
         // Deallocate the non-SVE locals first before we can deallocate (and
         // restore callee saves) from the SVE area.
         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
-                        StackOffset::getFixed(NumBytes), TII,
-                        MachineInstr::FrameDestroy);
+                        StackOffset::getFixed(NumBytes), TII, 
+                        MachineInstr::FrameDestroy); 
         NumBytes = 0;
       }
 
@@ -1744,10 +1744,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
       adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
 
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(StackRestoreBytes), TII,
+                    StackOffset::getFixed(StackRestoreBytes), TII, 
                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
     if (Done) {
-      if (HasWinCFI) {
+      if (HasWinCFI) { 
         BuildMI(MBB, MBB.getFirstTerminator(), DL,
                 TII->get(AArch64::SEH_EpilogEnd))
             .setMIFlag(MachineInstr::FrameDestroy);
@@ -1763,14 +1763,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
-    emitFrameOffset(
-        MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-        StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
-        TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+    emitFrameOffset( 
+        MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 
+        StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), 
+        TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); 
   } else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed(NumBytes), TII,
-                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
+                    StackOffset::getFixed(NumBytes), TII, 
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI); 
 
   // This must be placed after the callee-save restore code because that code
   // assumes the SP is at the same location as it was after the callee-save save
@@ -1791,10 +1791,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
 
     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
-                    StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
+                    StackOffset::getFixed((int64_t)AfterCSRPopSize), TII, 
                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
   }
-  if (HasWinCFI)
+  if (HasWinCFI) 
     BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
         .setMIFlag(MachineInstr::FrameDestroy);
 }
@@ -1803,51 +1803,51 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
 /// debug info.  It's the same as what we use for resolving the code-gen
 /// references for now.  FIXME: This can go wrong when references are
 /// SP-relative and simple call frames aren't used.
-StackOffset
-AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                             Register &FrameReg) const {
+StackOffset 
+AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 
+                                             Register &FrameReg) const { 
   return resolveFrameIndexReference(
-      MF, FI, FrameReg,
-      /*PreferFP=*/
-      MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
-      /*ForSimm=*/false);
+      MF, FI, FrameReg, 
+      /*PreferFP=*/ 
+      MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress), 
+      /*ForSimm=*/false); 
 }
 
-StackOffset
-AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
-                                                     int FI) const {
-  return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
+StackOffset 
+AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, 
+                                                     int FI) const { 
+  return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); 
 }
 
-static StackOffset getFPOffset(const MachineFunction &MF,
-                               int64_t ObjectOffset) {
+static StackOffset getFPOffset(const MachineFunction &MF, 
+                               int64_t ObjectOffset) { 
   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   bool IsWin64 =
       Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
   unsigned FixedObject =
       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
-  int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
-  int64_t FPAdjust =
-      CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
-  return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
+  int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); 
+  int64_t FPAdjust = 
+      CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); 
+  return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); 
 }
 
-static StackOffset getStackOffset(const MachineFunction &MF,
-                                  int64_t ObjectOffset) {
+static StackOffset getStackOffset(const MachineFunction &MF, 
+                                  int64_t ObjectOffset) { 
   const auto &MFI = MF.getFrameInfo();
-  return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
+  return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); 
 }
 
-  // TODO: This function currently does not work for scalable vectors.
+  // TODO: This function currently does not work for scalable vectors. 
 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
                                                  int FI) const {
   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
   return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
-             ? getFPOffset(MF, ObjectOffset).getFixed()
-             : getStackOffset(MF, ObjectOffset).getFixed();
+             ? getFPOffset(MF, ObjectOffset).getFixed() 
+             : getStackOffset(MF, ObjectOffset).getFixed(); 
 }
 
 StackOffset AArch64FrameLowering::resolveFrameIndexReference(
@@ -1856,7 +1856,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
   const auto &MFI = MF.getFrameInfo();
   int64_t ObjectOffset = MFI.getObjectOffset(FI);
   bool isFixed = MFI.isFixedObjectIndex(FI);
-  bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
+  bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector; 
   return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
                                      PreferFP, ForSimm);
 }
@@ -1870,8 +1870,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
 
-  int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
-  int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
+  int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); 
+  int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); 
   bool isCSR =
       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
 
@@ -1946,16 +1946,16 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
          "non-argument/CSR objects cannot be accessed through the frame pointer");
 
   if (isSVE) {
-    StackOffset FPOffset =
-        StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
-    StackOffset SPOffset =
-        SVEStackSize +
-        StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
-                         ObjectOffset);
+    StackOffset FPOffset = 
+        StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); 
+    StackOffset SPOffset = 
+        SVEStackSize + 
+        StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), 
+                         ObjectOffset); 
     // Always use the FP for SVE spills if available and beneficial.
     if (hasFP(MF) &&
-        (SPOffset.getFixed() ||
-         FPOffset.getScalable() < SPOffset.getScalable() ||
+        (SPOffset.getFixed() || 
+         FPOffset.getScalable() < SPOffset.getScalable() || 
          RegInfo->needsStackRealignment(MF))) {
       FrameReg = RegInfo->getFrameRegister(MF);
       return FPOffset;
@@ -1974,7 +1974,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
 
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
-    return StackOffset::getFixed(FPOffset) + ScalableOffset;
+    return StackOffset::getFixed(FPOffset) + ScalableOffset; 
   }
 
   // Use the base pointer if we have one.
@@ -1991,7 +1991,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
       Offset -= AFI->getLocalStackSize();
   }
 
-  return StackOffset::getFixed(Offset) + ScalableOffset;
+  return StackOffset::getFixed(Offset) + ScalableOffset; 
 }
 
 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -2013,12 +2013,12 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
 }
 
 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
-                                             bool NeedsWinCFI, bool IsFirst) {
+                                             bool NeedsWinCFI, bool IsFirst) { 
   // If we are generating register pairs for a Windows function that requires
   // EH support, then pair consecutive registers only.  There are no unwind
   // opcodes for saves/restores of non-consectuve register pairs.
-  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
-  // save_lrpair.
+  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, 
+  // save_lrpair. 
   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
 
   if (Reg2 == AArch64::FP)
@@ -2027,14 +2027,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
     return false;
   if (Reg2 == Reg1 + 1)
     return false;
-  // If pairing a GPR with LR, the pair can be described by the save_lrpair
-  // opcode. If this is the first register pair, it would end up with a
-  // predecrement, but there's no save_lrpair_x opcode, so we can only do this
-  // if LR is paired with something else than the first register.
-  // The save_lrpair opcode requires the first register to be an odd one.
-  if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
-      (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
-    return false;
+  // If pairing a GPR with LR, the pair can be described by the save_lrpair 
+  // opcode. If this is the first register pair, it would end up with a 
+  // predecrement, but there's no save_lrpair_x opcode, so we can only do this 
+  // if LR is paired with something else than the first register. 
+  // The save_lrpair opcode requires the first register to be an odd one. 
+  if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && 
+      (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) 
+    return false; 
   return true;
 }
 
@@ -2043,10 +2043,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
 /// LR and FP need to be allocated together when the frame needs to save
 /// the frame-record. This means any other register pairing with LR is invalid.
 static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
-                                      bool UsesWinAAPCS, bool NeedsWinCFI,
-                                      bool NeedsFrameRecord, bool IsFirst) {
+                                      bool UsesWinAAPCS, bool NeedsWinCFI, 
+                                      bool NeedsFrameRecord, bool IsFirst) { 
   if (UsesWinAAPCS)
-    return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
+    return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst); 
 
   // If we need to store the frame record, don't pair any register
   // with LR other than FP.
@@ -2110,22 +2110,22 @@ static void computeCalleeSaveRegisterPairs(
           (Count & 1) == 0) &&
          "Odd number of callee-saved regs to spill!");
   int ByteOffset = AFI->getCalleeSavedStackSize();
-  int StackFillDir = -1;
-  int RegInc = 1;
-  unsigned FirstReg = 0;
-  if (NeedsWinCFI) {
-    // For WinCFI, fill the stack from the bottom up.
-    ByteOffset = 0;
-    StackFillDir = 1;
-    // As the CSI array is reversed to match PrologEpilogInserter, iterate
-    // backwards, to pair up registers starting from lower numbered registers.
-    RegInc = -1;
-    FirstReg = Count - 1;
-  }
+  int StackFillDir = -1; 
+  int RegInc = 1; 
+  unsigned FirstReg = 0; 
+  if (NeedsWinCFI) { 
+    // For WinCFI, fill the stack from the bottom up. 
+    ByteOffset = 0; 
+    StackFillDir = 1; 
+    // As the CSI array is reversed to match PrologEpilogInserter, iterate 
+    // backwards, to pair up registers starting from lower numbered registers. 
+    RegInc = -1; 
+    FirstReg = Count - 1; 
+  } 
   int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
 
-  // When iterating backwards, the loop condition relies on unsigned wraparound.
-  for (unsigned i = FirstReg; i < Count; i += RegInc) {
+  // When iterating backwards, the loop condition relies on unsigned wraparound. 
+  for (unsigned i = FirstReg; i < Count; i += RegInc) { 
     RegPairInfo RPI;
     RPI.Reg1 = CSI[i].getReg();
 
@@ -2143,20 +2143,20 @@ static void computeCalleeSaveRegisterPairs(
       llvm_unreachable("Unsupported register class.");
 
     // Add the next reg to the pair if it is in the same register class.
-    if (unsigned(i + RegInc) < Count) {
-      unsigned NextReg = CSI[i + RegInc].getReg();
-      bool IsFirst = i == FirstReg;
+    if (unsigned(i + RegInc) < Count) { 
+      unsigned NextReg = CSI[i + RegInc].getReg(); 
+      bool IsFirst = i == FirstReg; 
       switch (RPI.Type) {
       case RegPairInfo::GPR:
         if (AArch64::GPR64RegClass.contains(NextReg) &&
-            !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
-                                       NeedsWinCFI, NeedsFrameRecord, IsFirst))
+            !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, 
+                                       NeedsWinCFI, NeedsFrameRecord, IsFirst)) 
           RPI.Reg2 = NextReg;
         break;
       case RegPairInfo::FPR64:
         if (AArch64::FPR64RegClass.contains(NextReg) &&
-            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
-                                              IsFirst))
+            !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, 
+                                              IsFirst)) 
           RPI.Reg2 = NextReg;
         break;
       case RegPairInfo::FPR128:
@@ -2185,7 +2185,7 @@ static void computeCalleeSaveRegisterPairs(
     // The order of the registers in the list is controlled by
     // getCalleeSavedRegs(), so they will always be in-order, as well.
     assert((!RPI.isPaired() ||
-            (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
+            (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && 
            "Out of order callee saved regs!");
 
     assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
@@ -2207,73 +2207,73 @@ static void computeCalleeSaveRegisterPairs(
            "Callee-save registers not saved as adjacent register pair!");
 
     RPI.FrameIdx = CSI[i].getFrameIdx();
-    if (NeedsWinCFI &&
-        RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
-      RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
+    if (NeedsWinCFI && 
+        RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair 
+      RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); 
 
     int Scale = RPI.getScale();
-
-    int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
-    assert(OffsetPre % Scale == 0);
-
+ 
+    int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 
+    assert(OffsetPre % Scale == 0); 
+ 
     if (RPI.isScalable())
-      ScalableByteOffset += StackFillDir * Scale;
+      ScalableByteOffset += StackFillDir * Scale; 
     else
-      ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
+      ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); 
 
     assert(!(RPI.isScalable() && RPI.isPaired()) &&
            "Paired spill/fill instructions don't exist for SVE vectors");
 
     // Round up size of non-pair to pair size if we need to pad the
     // callee-save area to ensure 16-byte alignment.
-    if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
+    if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI && 
         !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
         !RPI.isPaired()) {
-      ByteOffset += 8 * StackFillDir;
+      ByteOffset += 8 * StackFillDir; 
       assert(ByteOffset % 16 == 0);
       assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
-      // A stack frame with a gap looks like this, bottom up:
-      // d9, d8. x21, gap, x20, x19.
-      // Set extra alignment on the x21 object (the only unpaired register)
-      // to create the gap above it.
+      // A stack frame with a gap looks like this, bottom up: 
+      // d9, d8. x21, gap, x20, x19. 
+      // Set extra alignment on the x21 object (the only unpaired register) 
+      // to create the gap above it. 
       MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
     }
 
-    int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
-    assert(OffsetPost % Scale == 0);
-    // If filling top down (default), we want the offset after incrementing it.
-    // If fillibg bootom up (WinCFI) we need the original offset.
-    int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
+    int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 
+    assert(OffsetPost % Scale == 0); 
+    // If filling top down (default), we want the offset after incrementing it. 
+    // If fillibg bootom up (WinCFI) we need the original offset. 
+    int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; 
     RPI.Offset = Offset / Scale;
 
     assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
             (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
            "Offset out of bounds for LDP/STP immediate");
 
-    // Save the offset to frame record so that the FP register can point to the
-    // innermost frame record (spilled FP and LR registers).
-    if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
-                              RPI.Reg2 == AArch64::FP) ||
-                             (IsWindows && RPI.Reg1 == AArch64::FP &&
-                              RPI.Reg2 == AArch64::LR)))
-      AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
-
+    // Save the offset to frame record so that the FP register can point to the 
+    // innermost frame record (spilled FP and LR registers). 
+    if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR && 
+                              RPI.Reg2 == AArch64::FP) || 
+                             (IsWindows && RPI.Reg1 == AArch64::FP && 
+                              RPI.Reg2 == AArch64::LR))) 
+      AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); 
+ 
     RegPairs.push_back(RPI);
     if (RPI.isPaired())
-      i += RegInc;
-  }
-  if (NeedsWinCFI) {
-    // If we need an alignment gap in the stack, align the topmost stack
-    // object. A stack frame with a gap looks like this, bottom up:
-    // x19, d8. d9, gap.
-    // Set extra alignment on the topmost stack object (the first element in
-    // CSI, which goes top down), to create the gap above it.
-    if (AFI->hasCalleeSaveStackFreeSpace())
-      MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
-    // We iterated bottom up over the registers; flip RegPairs back to top
-    // down order.
-    std::reverse(RegPairs.begin(), RegPairs.end());
-  }
+      i += RegInc; 
+  }
+  if (NeedsWinCFI) { 
+    // If we need an alignment gap in the stack, align the topmost stack 
+    // object. A stack frame with a gap looks like this, bottom up: 
+    // x19, d8. d9, gap. 
+    // Set extra alignment on the topmost stack object (the first element in 
+    // CSI, which goes top down), to create the gap above it. 
+    if (AFI->hasCalleeSaveStackFreeSpace()) 
+      MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); 
+    // We iterated bottom up over the registers; flip RegPairs back to top 
+    // down order. 
+    std::reverse(RegPairs.begin(), RegPairs.end()); 
+  } 
 }
 
 bool AArch64FrameLowering::spillCalleeSavedRegisters(
@@ -2412,7 +2412,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     // Update the StackIDs of the SVE stack slots.
     MachineFrameInfo &MFI = MF.getFrameInfo();
     if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
-      MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector);
+      MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector); 
 
   }
   return true;
@@ -2704,21 +2704,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
 }
 
-bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
-    MachineFunction &MF, const TargetRegisterInfo *TRI,
-    std::vector<CalleeSavedInfo> &CSI) const {
-  bool NeedsWinCFI = needsWinCFI(MF);
-  // To match the canonical windows frame layout, reverse the list of
-  // callee saved registers to get them laid out by PrologEpilogInserter
-  // in the right order. (PrologEpilogInserter allocates stack objects top
-  // down. Windows canonical prologs store higher numbered registers at
-  // the top, thus have the CSI array start from the highest registers.)
-  if (NeedsWinCFI)
-    std::reverse(CSI.begin(), CSI.end());
-  // Let the generic code do the rest of the setup.
-  return false;
-}
-
+bool AArch64FrameLowering::assignCalleeSavedSpillSlots( 
+    MachineFunction &MF, const TargetRegisterInfo *TRI, 
+    std::vector<CalleeSavedInfo> &CSI) const { 
+  bool NeedsWinCFI = needsWinCFI(MF); 
+  // To match the canonical windows frame layout, reverse the list of 
+  // callee saved registers to get them laid out by PrologEpilogInserter 
+  // in the right order. (PrologEpilogInserter allocates stack objects top 
+  // down. Windows canonical prologs store higher numbered registers at 
+  // the top, thus have the CSI array start from the highest registers.) 
+  if (NeedsWinCFI) 
+    std::reverse(CSI.begin(), CSI.end()); 
+  // Let the generic code do the rest of the setup. 
+  return false; 
+} 
+ 
 bool AArch64FrameLowering::enableStackSlotScavenging(
     const MachineFunction &MF) const {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2761,7 +2761,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
 #ifndef NDEBUG
   // First process all fixed stack objects.
   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-    assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
+    assert(MFI.getStackID(I) != TargetStackID::ScalableVector && 
            "SVE vectors should never be passed on the stack by value, only by "
            "reference.");
 #endif
@@ -2791,7 +2791,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
   SmallVector<int, 8> ObjectsToAllocate;
   for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
     unsigned StackID = MFI.getStackID(I);
-    if (StackID != TargetStackID::ScalableVector)
+    if (StackID != TargetStackID::ScalableVector) 
       continue;
     if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
       continue;
@@ -2945,12 +2945,12 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
   const int64_t kMaxOffset = 255 * 16;
 
   Register BaseReg = FrameReg;
-  int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
+  int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); 
   if (BaseRegOffsetBytes < kMinOffset ||
       BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
     Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
     emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
-                    StackOffset::getFixed(BaseRegOffsetBytes), TII);
+                    StackOffset::getFixed(BaseRegOffsetBytes), TII); 
     BaseReg = ScratchReg;
     BaseRegOffsetBytes = 0;
   }
@@ -3007,7 +3007,7 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
     LoopI->setFlags(FrameRegUpdateFlags);
 
   int64_t ExtraBaseRegUpdate =
-      FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
+      FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; 
   if (LoopSize < Size) {
     assert(FrameRegUpdate);
     assert(Size - LoopSize == 16);
@@ -3111,7 +3111,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
       // realistically happens in function epilogue. Also, STGloop is expanded
       // before that pass.
       if (InsertI != MBB->end() &&
-          canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
+          canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, 
                             &TotalOffset)) {
         UpdateInstr = &*InsertI++;
         LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n  "
@@ -3274,7 +3274,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
 /// before the update.  This is easily retrieved as it is exactly the offset
 /// that is set in processFunctionBeforeFrameFinalized.
-StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
+StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( 
     const MachineFunction &MF, int FI, Register &FrameReg,
     bool IgnoreSPUpdates) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3282,7 +3282,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
     LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
                       << MFI.getObjectOffset(FI) << "\n");
     FrameReg = AArch64::SP;
-    return StackOffset::getFixed(MFI.getObjectOffset(FI));
+    return StackOffset::getFixed(MFI.getObjectOffset(FI)); 
   }
 
   return getFrameIndexReference(MF, FI, FrameReg);
@@ -3306,162 +3306,162 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
                  getStackAlign());
 }
-
-namespace {
-struct FrameObject {
-  bool IsValid = false;
-  // Index of the object in MFI.
-  int ObjectIndex = 0;
-  // Group ID this object belongs to.
-  int GroupIndex = -1;
-  // This object should be placed first (closest to SP).
-  bool ObjectFirst = false;
-  // This object's group (which always contains the object with
-  // ObjectFirst==true) should be placed first.
-  bool GroupFirst = false;
-};
-
-class GroupBuilder {
-  SmallVector<int, 8> CurrentMembers;
-  int NextGroupIndex = 0;
-  std::vector<FrameObject> &Objects;
-
-public:
-  GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
-  void AddMember(int Index) { CurrentMembers.push_back(Index); }
-  void EndCurrentGroup() {
-    if (CurrentMembers.size() > 1) {
-      // Create a new group with the current member list. This might remove them
-      // from their pre-existing groups. That's OK, dealing with overlapping
-      // groups is too hard and unlikely to make a difference.
-      LLVM_DEBUG(dbgs() << "group:");
-      for (int Index : CurrentMembers) {
-        Objects[Index].GroupIndex = NextGroupIndex;
-        LLVM_DEBUG(dbgs() << " " << Index);
-      }
-      LLVM_DEBUG(dbgs() << "\n");
-      NextGroupIndex++;
-    }
-    CurrentMembers.clear();
-  }
-};
-
-bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
-  // Objects at a lower index are closer to FP; objects at a higher index are
-  // closer to SP.
-  //
-  // For consistency in our comparison, all invalid objects are placed
-  // at the end. This also allows us to stop walking when we hit the
-  // first invalid item after it's all sorted.
-  //
-  // The "first" object goes first (closest to SP), followed by the members of
-  // the "first" group.
-  //
-  // The rest are sorted by the group index to keep the groups together.
-  // Higher numbered groups are more likely to be around longer (i.e. untagged
-  // in the function epilogue and not at some earlier point). Place them closer
-  // to SP.
-  //
-  // If all else equal, sort by the object index to keep the objects in the
-  // original order.
-  return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
-                         A.ObjectIndex) <
-         std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
-                         B.ObjectIndex);
-}
-} // namespace
-
-void AArch64FrameLowering::orderFrameObjects(
-    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
-  if (!OrderFrameObjects || ObjectsToAllocate.empty())
-    return;
-
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
-  for (auto &Obj : ObjectsToAllocate) {
-    FrameObjects[Obj].IsValid = true;
-    FrameObjects[Obj].ObjectIndex = Obj;
-  }
-
-  // Identify stack slots that are tagged at the same time.
-  GroupBuilder GB(FrameObjects);
-  for (auto &MBB : MF) {
-    for (auto &MI : MBB) {
-      if (MI.isDebugInstr())
-        continue;
-      int OpIndex;
-      switch (MI.getOpcode()) {
-      case AArch64::STGloop:
-      case AArch64::STZGloop:
-        OpIndex = 3;
-        break;
-      case AArch64::STGOffset:
-      case AArch64::STZGOffset:
-      case AArch64::ST2GOffset:
-      case AArch64::STZ2GOffset:
-        OpIndex = 1;
-        break;
-      default:
-        OpIndex = -1;
-      }
-
-      int TaggedFI = -1;
-      if (OpIndex >= 0) {
-        const MachineOperand &MO = MI.getOperand(OpIndex);
-        if (MO.isFI()) {
-          int FI = MO.getIndex();
-          if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
-              FrameObjects[FI].IsValid)
-            TaggedFI = FI;
-        }
-      }
-
-      // If this is a stack tagging instruction for a slot that is not part of a
-      // group yet, either start a new group or add it to the current one.
-      if (TaggedFI >= 0)
-        GB.AddMember(TaggedFI);
-      else
-        GB.EndCurrentGroup();
-    }
-    // Groups should never span multiple basic blocks.
-    GB.EndCurrentGroup();
-  }
-
-  // If the function's tagged base pointer is pinned to a stack slot, we want to
-  // put that slot first when possible. This will likely place it at SP + 0,
-  // and save one instruction when generating the base pointer because IRG does
-  // not allow an immediate offset.
-  const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
-  Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
-  if (TBPI) {
-    FrameObjects[*TBPI].ObjectFirst = true;
-    FrameObjects[*TBPI].GroupFirst = true;
-    int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
-    if (FirstGroupIndex >= 0)
-      for (FrameObject &Object : FrameObjects)
-        if (Object.GroupIndex == FirstGroupIndex)
-          Object.GroupFirst = true;
-  }
-
-  llvm::stable_sort(FrameObjects, FrameObjectCompare);
-
-  int i = 0;
-  for (auto &Obj : FrameObjects) {
-    // All invalid items are sorted at the end, so it's safe to stop.
-    if (!Obj.IsValid)
-      break;
-    ObjectsToAllocate[i++] = Obj.ObjectIndex;
-  }
-
-  LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
-                                                    : FrameObjects) {
-    if (!Obj.IsValid)
-      break;
-    dbgs() << "  " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
-    if (Obj.ObjectFirst)
-      dbgs() << ", first";
-    if (Obj.GroupFirst)
-      dbgs() << ", group-first";
-    dbgs() << "\n";
-  });
-}
+ 
+namespace { 
+struct FrameObject { 
+  bool IsValid = false; 
+  // Index of the object in MFI. 
+  int ObjectIndex = 0; 
+  // Group ID this object belongs to. 
+  int GroupIndex = -1; 
+  // This object should be placed first (closest to SP). 
+  bool ObjectFirst = false; 
+  // This object's group (which always contains the object with 
+  // ObjectFirst==true) should be placed first. 
+  bool GroupFirst = false; 
+}; 
+ 
+class GroupBuilder { 
+  SmallVector<int, 8> CurrentMembers; 
+  int NextGroupIndex = 0; 
+  std::vector<FrameObject> &Objects; 
+ 
+public: 
+  GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {} 
+  void AddMember(int Index) { CurrentMembers.push_back(Index); } 
+  void EndCurrentGroup() { 
+    if (CurrentMembers.size() > 1) { 
+      // Create a new group with the current member list. This might remove them 
+      // from their pre-existing groups. That's OK, dealing with overlapping 
+      // groups is too hard and unlikely to make a difference. 
+      LLVM_DEBUG(dbgs() << "group:"); 
+      for (int Index : CurrentMembers) { 
+        Objects[Index].GroupIndex = NextGroupIndex; 
+        LLVM_DEBUG(dbgs() << " " << Index); 
+      } 
+      LLVM_DEBUG(dbgs() << "\n"); 
+      NextGroupIndex++; 
+    } 
+    CurrentMembers.clear(); 
+  } 
+}; 
+ 
+bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { 
+  // Objects at a lower index are closer to FP; objects at a higher index are 
+  // closer to SP. 
+  // 
+  // For consistency in our comparison, all invalid objects are placed 
+  // at the end. This also allows us to stop walking when we hit the 
+  // first invalid item after it's all sorted. 
+  // 
+  // The "first" object goes first (closest to SP), followed by the members of 
+  // the "first" group. 
+  // 
+  // The rest are sorted by the group index to keep the groups together. 
+  // Higher numbered groups are more likely to be around longer (i.e. untagged 
+  // in the function epilogue and not at some earlier point). Place them closer 
+  // to SP. 
+  // 
+  // If all else equal, sort by the object index to keep the objects in the 
+  // original order. 
+  return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex, 
+                         A.ObjectIndex) < 
+         std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex, 
+                         B.ObjectIndex); 
+} 
+} // namespace 
+ 
+void AArch64FrameLowering::orderFrameObjects( 
+    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { 
+  if (!OrderFrameObjects || ObjectsToAllocate.empty()) 
+    return; 
+ 
+  const MachineFrameInfo &MFI = MF.getFrameInfo(); 
+  std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); 
+  for (auto &Obj : ObjectsToAllocate) { 
+    FrameObjects[Obj].IsValid = true; 
+    FrameObjects[Obj].ObjectIndex = Obj; 
+  } 
+ 
+  // Identify stack slots that are tagged at the same time. 
+  GroupBuilder GB(FrameObjects); 
+  for (auto &MBB : MF) { 
+    for (auto &MI : MBB) { 
+      if (MI.isDebugInstr()) 
+        continue; 
+      int OpIndex; 
+      switch (MI.getOpcode()) { 
+      case AArch64::STGloop: 
+      case AArch64::STZGloop: 
+        OpIndex = 3; 
+        break; 
+      case AArch64::STGOffset: 
+      case AArch64::STZGOffset: 
+      case AArch64::ST2GOffset: 
+      case AArch64::STZ2GOffset: 
+        OpIndex = 1; 
+        break; 
+      default: 
+        OpIndex = -1; 
+      } 
+ 
+      int TaggedFI = -1; 
+      if (OpIndex >= 0) { 
+        const MachineOperand &MO = MI.getOperand(OpIndex); 
+        if (MO.isFI()) { 
+          int FI = MO.getIndex(); 
+          if (FI >= 0 && FI < MFI.getObjectIndexEnd() && 
+              FrameObjects[FI].IsValid) 
+            TaggedFI = FI; 
+        } 
+      } 
+ 
+      // If this is a stack tagging instruction for a slot that is not part of a 
+      // group yet, either start a new group or add it to the current one. 
+      if (TaggedFI >= 0) 
+        GB.AddMember(TaggedFI); 
+      else 
+        GB.EndCurrentGroup(); 
+    } 
+    // Groups should never span multiple basic blocks. 
+    GB.EndCurrentGroup(); 
+  } 
+ 
+  // If the function's tagged base pointer is pinned to a stack slot, we want to 
+  // put that slot first when possible. This will likely place it at SP + 0, 
+  // and save one instruction when generating the base pointer because IRG does 
+  // not allow an immediate offset. 
+  const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 
+  Optional<int> TBPI = AFI.getTaggedBasePointerIndex(); 
+  if (TBPI) { 
+    FrameObjects[*TBPI].ObjectFirst = true; 
+    FrameObjects[*TBPI].GroupFirst = true; 
+    int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; 
+    if (FirstGroupIndex >= 0) 
+      for (FrameObject &Object : FrameObjects) 
+        if (Object.GroupIndex == FirstGroupIndex) 
+          Object.GroupFirst = true; 
+  } 
+ 
+  llvm::stable_sort(FrameObjects, FrameObjectCompare); 
+ 
+  int i = 0; 
+  for (auto &Obj : FrameObjects) { 
+    // All invalid items are sorted at the end, so it's safe to stop. 
+    if (!Obj.IsValid) 
+      break; 
+    ObjectsToAllocate[i++] = Obj.ObjectIndex; 
+  } 
+ 
+  LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj 
+                                                    : FrameObjects) { 
+    if (!Obj.IsValid) 
+      break; 
+    dbgs() << "  " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; 
+    if (Obj.ObjectFirst) 
+      dbgs() << ", first"; 
+    if (Obj.GroupFirst) 
+      dbgs() << ", group-first"; 
+    dbgs() << "\n"; 
+  }); 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
index 80079a9d98..b3a402de03 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,7 +13,7 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
 
-#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/TypeSize.h" 
 #include "llvm/CodeGen/TargetFrameLowering.h"
 
 namespace llvm {
@@ -41,8 +41,8 @@ public:
 
   bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
 
-  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
-                                     Register &FrameReg) const override;
+  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, 
+                                     Register &FrameReg) const override; 
   StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
                                          Register &FrameReg, bool PreferFP,
                                          bool ForSimm) const;
@@ -67,11 +67,11 @@ public:
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
-  bool
-  assignCalleeSavedSpillSlots(MachineFunction &MF,
-                              const TargetRegisterInfo *TRI,
-                              std::vector<CalleeSavedInfo> &CSI) const override;
-
+  bool 
+  assignCalleeSavedSpillSlots(MachineFunction &MF, 
+                              const TargetRegisterInfo *TRI, 
+                              std::vector<CalleeSavedInfo> &CSI) const override; 
+ 
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
 
@@ -94,12 +94,12 @@ public:
 
   unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
 
-  StackOffset
-  getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
-                                 Register &FrameReg,
-                                 bool IgnoreSPUpdates) const override;
-  StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
-                                             int FI) const override;
+  StackOffset 
+  getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, 
+                                 Register &FrameReg, 
+                                 bool IgnoreSPUpdates) const override; 
+  StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, 
+                                             int FI) const override; 
   int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
 
   bool isSupportedStackID(TargetStackID::Value ID) const override {
@@ -107,7 +107,7 @@ public:
     default:
       return false;
     case TargetStackID::Default:
-    case TargetStackID::ScalableVector:
+    case TargetStackID::ScalableVector: 
     case TargetStackID::NoAlloc:
       return true;
     }
@@ -116,13 +116,13 @@ public:
   bool isStackIdSafeForLocalArea(unsigned StackId) const override {
     // We don't support putting SVE objects into the pre-allocated local
     // frame block at the moment.
-    return StackId != TargetStackID::ScalableVector;
+    return StackId != TargetStackID::ScalableVector; 
   }
 
-  void
-  orderFrameObjects(const MachineFunction &MF,
-                    SmallVectorImpl<int> &ObjectsToAllocate) const override;
-
+  void 
+  orderFrameObjects(const MachineFunction &MF, 
+                    SmallVectorImpl<int> &ObjectsToAllocate) const override; 
+ 
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       uint64_t StackBumpBytes) const;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 94b5d7718d..a570f2d3b0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineFunctionInfo.h" 
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/APSInt.h"
@@ -191,16 +191,16 @@ public:
     return SelectSVELogicalImm(N, VT, Imm);
   }
 
-  template <MVT::SimpleValueType VT>
-  bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
-    return SelectSVEArithImm(N, VT, Imm);
-  }
-
-  template <unsigned Low, unsigned High, bool AllowSaturation = false>
-  bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
-    return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
+  template <MVT::SimpleValueType VT> 
+  bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 
+    return SelectSVEArithImm(N, VT, Imm); 
   }
 
+  template <unsigned Low, unsigned High, bool AllowSaturation = false> 
+  bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 
+    return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 
+  } 
+ 
   // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
   template<signed Min, signed Max, signed Scale, bool Shift>
   bool SelectCntImm(SDValue N, SDValue &Imm) {
@@ -329,10 +329,10 @@ private:
   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
 
   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
-  bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
-                         bool AllowSaturation, SDValue &Imm);
+  bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 
+                         bool AllowSaturation, SDValue &Imm); 
 
-  bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
+  bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 
   bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
                                SDValue &Offset);
 };
@@ -1377,12 +1377,12 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
 
   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
 
-  // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
-  // because it's too simple to have needed special treatment during lowering.
-  if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
-    MachineMemOperand *MemOp = MemIntr->getMemOperand();
-    CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
-  }
+  // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 
+  // because it's too simple to have needed special treatment during lowering. 
+  if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 
+    MachineMemOperand *MemOp = MemIntr->getMemOperand(); 
+    CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 
+  } 
 
   CurDAG->RemoveDeadNode(N);
 }
@@ -3136,28 +3136,28 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
   return false;
 }
 
-bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
+bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 
   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
-    uint64_t ImmVal = CNode->getZExtValue();
-
-    switch (VT.SimpleTy) {
-    case MVT::i8:
-      ImmVal &= 0xFF;
-      break;
-    case MVT::i16:
-      ImmVal &= 0xFFFF;
-      break;
-    case MVT::i32:
-      ImmVal &= 0xFFFFFFFF;
-      break;
-    case MVT::i64:
-      break;
-    default:
-      llvm_unreachable("Unexpected type");
-    }
-
+    uint64_t ImmVal = CNode->getZExtValue(); 
+ 
+    switch (VT.SimpleTy) { 
+    case MVT::i8: 
+      ImmVal &= 0xFF; 
+      break; 
+    case MVT::i16: 
+      ImmVal &= 0xFFFF; 
+      break; 
+    case MVT::i32: 
+      ImmVal &= 0xFFFFFFFF; 
+      break; 
+    case MVT::i64: 
+      break; 
+    default: 
+      llvm_unreachable("Unexpected type"); 
+    } 
+ 
     if (ImmVal < 256) {
-      Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
+      Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 
       return true;
     }
   }
@@ -3201,30 +3201,30 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
   return false;
 }
 
-// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
-// Rather than attempt to normalise everything we can sometimes saturate the
-// shift amount during selection. This function also allows for consistent
-// isel patterns by ensuring the resulting "Imm" node is of the i32 type
-// required by the instructions.
-bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
-                                            uint64_t High, bool AllowSaturation,
-                                            SDValue &Imm) {
+// SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 
+// Rather than attempt to normalise everything we can sometimes saturate the 
+// shift amount during selection. This function also allows for consistent 
+// isel patterns by ensuring the resulting "Imm" node is of the i32 type 
+// required by the instructions. 
+bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 
+                                            uint64_t High, bool AllowSaturation, 
+                                            SDValue &Imm) { 
   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
     uint64_t ImmVal = CN->getZExtValue();
 
-    // Reject shift amounts that are too small.
-    if (ImmVal < Low)
-      return false;
-
-    // Reject or saturate shift amounts that are too big.
-    if (ImmVal > High) {
-      if (!AllowSaturation)
-        return false;
-      ImmVal = High;
+    // Reject shift amounts that are too small. 
+    if (ImmVal < Low) 
+      return false; 
+ 
+    // Reject or saturate shift amounts that are too big. 
+    if (ImmVal > High) { 
+      if (!AllowSaturation) 
+        return false; 
+      ImmVal = High; 
     }
-
-    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
-    return true;
+ 
+    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 
+    return true; 
   }
 
   return false;
@@ -3833,9 +3833,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
         return;
       }
       break;
-    case Intrinsic::aarch64_ld64b:
-      SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
-      return;
+    case Intrinsic::aarch64_ld64b: 
+      SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 
+      return; 
     }
   } break;
   case ISD::INTRINSIC_WO_CHAIN: {
@@ -4854,8 +4854,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
     return EVT();
 
   ElementCount EC = PredVT.getVectorElementCount();
-  EVT ScalarVT =
-      EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
+  EVT ScalarVT = 
+      EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 
   EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
 
   return MemVT;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
index c522ee7662..513c8932b3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -112,76 +112,76 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
                                   "optimization"),
                          cl::init(true));
 
-// Temporary option added for the purpose of testing functionality added
-// to DAGCombiner.cpp in D92230. It is expected that this can be removed
-// in future when both implementations will be based off MGATHER rather
-// than the GLD1 nodes added for the SVE gather load intrinsics.
-static cl::opt<bool>
-EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
-                                cl::desc("Combine extends of AArch64 masked "
-                                         "gather intrinsics"),
-                                cl::init(true));
-
+// Temporary option added for the purpose of testing functionality added 
+// to DAGCombiner.cpp in D92230. It is expected that this can be removed 
+// in future when both implementations will be based off MGATHER rather 
+// than the GLD1 nodes added for the SVE gather load intrinsics. 
+static cl::opt<bool> 
+EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, 
+                                cl::desc("Combine extends of AArch64 masked " 
+                                         "gather intrinsics"), 
+                                cl::init(true)); 
+ 
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
 
-static inline EVT getPackedSVEVectorVT(EVT VT) {
-  switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    llvm_unreachable("unexpected element type for vector");
-  case MVT::i8:
-    return MVT::nxv16i8;
-  case MVT::i16:
-    return MVT::nxv8i16;
-  case MVT::i32:
-    return MVT::nxv4i32;
-  case MVT::i64:
-    return MVT::nxv2i64;
-  case MVT::f16:
-    return MVT::nxv8f16;
-  case MVT::f32:
-    return MVT::nxv4f32;
-  case MVT::f64:
-    return MVT::nxv2f64;
-  case MVT::bf16:
-    return MVT::nxv8bf16;
-  }
-}
-
-// NOTE: Currently there's only a need to return integer vector types. If this
-// changes then just add an extra "type" parameter.
-static inline EVT getPackedSVEVectorVT(ElementCount EC) {
-  switch (EC.getKnownMinValue()) {
-  default:
-    llvm_unreachable("unexpected element count for vector");
-  case 16:
-    return MVT::nxv16i8;
-  case 8:
-    return MVT::nxv8i16;
-  case 4:
-    return MVT::nxv4i32;
-  case 2:
-    return MVT::nxv2i64;
-  }
-}
-
-static inline EVT getPromotedVTForPredicate(EVT VT) {
-  assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
-         "Expected scalable predicate vector type!");
-  switch (VT.getVectorMinNumElements()) {
-  default:
-    llvm_unreachable("unexpected element count for vector");
-  case 2:
-    return MVT::nxv2i64;
-  case 4:
-    return MVT::nxv4i32;
-  case 8:
-    return MVT::nxv8i16;
-  case 16:
-    return MVT::nxv16i8;
-  }
-}
-
+static inline EVT getPackedSVEVectorVT(EVT VT) { 
+  switch (VT.getSimpleVT().SimpleTy) { 
+  default: 
+    llvm_unreachable("unexpected element type for vector"); 
+  case MVT::i8: 
+    return MVT::nxv16i8; 
+  case MVT::i16: 
+    return MVT::nxv8i16; 
+  case MVT::i32: 
+    return MVT::nxv4i32; 
+  case MVT::i64: 
+    return MVT::nxv2i64; 
+  case MVT::f16: 
+    return MVT::nxv8f16; 
+  case MVT::f32: 
+    return MVT::nxv4f32; 
+  case MVT::f64: 
+    return MVT::nxv2f64; 
+  case MVT::bf16: 
+    return MVT::nxv8bf16; 
+  } 
+} 
+ 
+// NOTE: Currently there's only a need to return integer vector types. If this 
+// changes then just add an extra "type" parameter. 
+static inline EVT getPackedSVEVectorVT(ElementCount EC) { 
+  switch (EC.getKnownMinValue()) { 
+  default: 
+    llvm_unreachable("unexpected element count for vector"); 
+  case 16: 
+    return MVT::nxv16i8; 
+  case 8: 
+    return MVT::nxv8i16; 
+  case 4: 
+    return MVT::nxv4i32; 
+  case 2: 
+    return MVT::nxv2i64; 
+  } 
+} 
+ 
+static inline EVT getPromotedVTForPredicate(EVT VT) { 
+  assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && 
+         "Expected scalable predicate vector type!"); 
+  switch (VT.getVectorMinNumElements()) { 
+  default: 
+    llvm_unreachable("unexpected element count for vector"); 
+  case 2: 
+    return MVT::nxv2i64; 
+  case 4: 
+    return MVT::nxv4i32; 
+  case 8: 
+    return MVT::nxv8i16; 
+  case 16: 
+    return MVT::nxv16i8; 
+  } 
+} 
+ 
 /// Returns true if VT's elements occupy the lowest bit positions of its
 /// associated register class without any intervening space.
 ///
@@ -194,42 +194,42 @@ static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
          VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
 }
 
-// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
-// predicate and end with a passthru value matching the result type.
-static bool isMergePassthruOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
-  case AArch64ISD::BSWAP_MERGE_PASSTHRU:
-  case AArch64ISD::CTLZ_MERGE_PASSTHRU:
-  case AArch64ISD::CTPOP_MERGE_PASSTHRU:
-  case AArch64ISD::DUP_MERGE_PASSTHRU:
-  case AArch64ISD::ABS_MERGE_PASSTHRU:
-  case AArch64ISD::NEG_MERGE_PASSTHRU:
-  case AArch64ISD::FNEG_MERGE_PASSTHRU:
-  case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
-  case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
-  case AArch64ISD::FCEIL_MERGE_PASSTHRU:
-  case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
-  case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
-  case AArch64ISD::FRINT_MERGE_PASSTHRU:
-  case AArch64ISD::FROUND_MERGE_PASSTHRU:
-  case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
-  case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
-  case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
-  case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
-  case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
-  case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
-  case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
-  case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
-  case AArch64ISD::FSQRT_MERGE_PASSTHRU:
-  case AArch64ISD::FRECPX_MERGE_PASSTHRU:
-  case AArch64ISD::FABS_MERGE_PASSTHRU:
-    return true;
-  }
-}
-
+// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading 
+// predicate and end with a passthru value matching the result type. 
+static bool isMergePassthruOpcode(unsigned Opc) { 
+  switch (Opc) { 
+  default: 
+    return false; 
+  case AArch64ISD::BITREVERSE_MERGE_PASSTHRU: 
+  case AArch64ISD::BSWAP_MERGE_PASSTHRU: 
+  case AArch64ISD::CTLZ_MERGE_PASSTHRU: 
+  case AArch64ISD::CTPOP_MERGE_PASSTHRU: 
+  case AArch64ISD::DUP_MERGE_PASSTHRU: 
+  case AArch64ISD::ABS_MERGE_PASSTHRU: 
+  case AArch64ISD::NEG_MERGE_PASSTHRU: 
+  case AArch64ISD::FNEG_MERGE_PASSTHRU: 
+  case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: 
+  case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: 
+  case AArch64ISD::FCEIL_MERGE_PASSTHRU: 
+  case AArch64ISD::FFLOOR_MERGE_PASSTHRU: 
+  case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: 
+  case AArch64ISD::FRINT_MERGE_PASSTHRU: 
+  case AArch64ISD::FROUND_MERGE_PASSTHRU: 
+  case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: 
+  case AArch64ISD::FTRUNC_MERGE_PASSTHRU: 
+  case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: 
+  case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: 
+  case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: 
+  case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: 
+  case AArch64ISD::FCVTZU_MERGE_PASSTHRU: 
+  case AArch64ISD::FCVTZS_MERGE_PASSTHRU: 
+  case AArch64ISD::FSQRT_MERGE_PASSTHRU: 
+  case AArch64ISD::FRECPX_MERGE_PASSTHRU: 
+  case AArch64ISD::FABS_MERGE_PASSTHRU: 
+    return true; 
+  } 
+} 
+ 
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                                              const AArch64Subtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -263,8 +263,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     addDRTypeForNEON(MVT::v1i64);
     addDRTypeForNEON(MVT::v1f64);
     addDRTypeForNEON(MVT::v4f16);
-    if (Subtarget->hasBF16())
-      addDRTypeForNEON(MVT::v4bf16);
+    if (Subtarget->hasBF16()) 
+      addDRTypeForNEON(MVT::v4bf16); 
 
     addQRTypeForNEON(MVT::v4f32);
     addQRTypeForNEON(MVT::v2f64);
@@ -273,8 +273,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     addQRTypeForNEON(MVT::v4i32);
     addQRTypeForNEON(MVT::v2i64);
     addQRTypeForNEON(MVT::v8f16);
-    if (Subtarget->hasBF16())
-      addQRTypeForNEON(MVT::v8bf16);
+    if (Subtarget->hasBF16()) 
+      addQRTypeForNEON(MVT::v8bf16); 
   }
 
   if (Subtarget->hasSVE()) {
@@ -303,7 +303,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
     }
 
-    if (Subtarget->useSVEForFixedLengthVectors()) {
+    if (Subtarget->useSVEForFixedLengthVectors()) { 
       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
         if (useSVEForFixedLengthVectorVT(VT))
           addRegisterClass(VT, &AArch64::ZPRRegClass);
@@ -334,9 +334,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
            MVT::nxv2f64 }) {
       setCondCodeAction(ISD::SETO, VT, Expand);
       setCondCodeAction(ISD::SETOLT, VT, Expand);
-      setCondCodeAction(ISD::SETLT, VT, Expand);
+      setCondCodeAction(ISD::SETLT, VT, Expand); 
       setCondCodeAction(ISD::SETOLE, VT, Expand);
-      setCondCodeAction(ISD::SETLE, VT, Expand);
+      setCondCodeAction(ISD::SETLE, VT, Expand); 
       setCondCodeAction(ISD::SETULT, VT, Expand);
       setCondCodeAction(ISD::SETULE, VT, Expand);
       setCondCodeAction(ISD::SETUGE, VT, Expand);
@@ -402,12 +402,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Virtually no operation on f128 is legal, but LLVM can't expand them when
   // there's a valid register class, so we need custom operations in most cases.
   setOperationAction(ISD::FABS, MVT::f128, Expand);
-  setOperationAction(ISD::FADD, MVT::f128, LibCall);
+  setOperationAction(ISD::FADD, MVT::f128, LibCall); 
   setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
   setOperationAction(ISD::FCOS, MVT::f128, Expand);
-  setOperationAction(ISD::FDIV, MVT::f128, LibCall);
+  setOperationAction(ISD::FDIV, MVT::f128, LibCall); 
   setOperationAction(ISD::FMA, MVT::f128, Expand);
-  setOperationAction(ISD::FMUL, MVT::f128, LibCall);
+  setOperationAction(ISD::FMUL, MVT::f128, LibCall); 
   setOperationAction(ISD::FNEG, MVT::f128, Expand);
   setOperationAction(ISD::FPOW, MVT::f128, Expand);
   setOperationAction(ISD::FREM, MVT::f128, Expand);
@@ -415,7 +415,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FSIN, MVT::f128, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
   setOperationAction(ISD::FSQRT, MVT::f128, Expand);
-  setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+  setOperationAction(ISD::FSUB, MVT::f128, LibCall); 
   setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
   setOperationAction(ISD::SETCC, MVT::f128, Custom);
   setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
@@ -451,10 +451,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
-  setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+  setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); 
   setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
-  setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+  setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); 
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
 
@@ -509,9 +509,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTPOP, MVT::i64, Custom);
   setOperationAction(ISD::CTPOP, MVT::i128, Custom);
 
-  setOperationAction(ISD::ABS, MVT::i32, Custom);
-  setOperationAction(ISD::ABS, MVT::i64, Custom);
-
+  setOperationAction(ISD::ABS, MVT::i32, Custom); 
+  setOperationAction(ISD::ABS, MVT::i64, Custom); 
+ 
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -699,57 +699,57 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
   setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
 
-  // Generate outline atomics library calls only if LSE was not specified for
-  // subtarget
-  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
-    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
-    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
-    setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
-#define LCALLNAMES(A, B, N)                                                    \
-  setLibcallName(A##N##_RELAX, #B #N "_relax");                                \
-  setLibcallName(A##N##_ACQ, #B #N "_acq");                                    \
-  setLibcallName(A##N##_REL, #B #N "_rel");                                    \
-  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
-#define LCALLNAME4(A, B)                                                       \
-  LCALLNAMES(A, B, 1)                                                          \
-  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
-#define LCALLNAME5(A, B)                                                       \
-  LCALLNAMES(A, B, 1)                                                          \
-  LCALLNAMES(A, B, 2)                                                          \
-  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
-    LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
-    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
-    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
-    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
-    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
-    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
-#undef LCALLNAMES
-#undef LCALLNAME4
-#undef LCALLNAME5
-  }
-
+  // Generate outline atomics library calls only if LSE was not specified for 
+  // subtarget 
+  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) { 
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall); 
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall); 
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); 
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall); 
+#define LCALLNAMES(A, B, N)                                                    \ 
+  setLibcallName(A##N##_RELAX, #B #N "_relax");                                \ 
+  setLibcallName(A##N##_ACQ, #B #N "_acq");                                    \ 
+  setLibcallName(A##N##_REL, #B #N "_rel");                                    \ 
+  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); 
+#define LCALLNAME4(A, B)                                                       \ 
+  LCALLNAMES(A, B, 1)                                                          \ 
+  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) 
+#define LCALLNAME5(A, B)                                                       \ 
+  LCALLNAMES(A, B, 1)                                                          \ 
+  LCALLNAMES(A, B, 2)                                                          \ 
+  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16) 
+    LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas) 
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp) 
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd) 
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset) 
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr) 
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor) 
+#undef LCALLNAMES 
+#undef LCALLNAME4 
+#undef LCALLNAME5 
+  } 
+ 
   // 128-bit loads and stores can be done without expanding
   setOperationAction(ISD::LOAD, MVT::i128, Custom);
   setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -839,8 +839,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // Trap.
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
-  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
-  setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); 
+  setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); 
 
   // We combine OR nodes for bitfield operations.
   setTargetDAGCombine(ISD::OR);
@@ -850,7 +850,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine(ISD::ADD);
-  setTargetDAGCombine(ISD::ABS);
+  setTargetDAGCombine(ISD::ABS); 
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::XOR);
@@ -867,15 +867,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
-  setTargetDAGCombine(ISD::TRUNCATE);
+  setTargetDAGCombine(ISD::TRUNCATE); 
   setTargetDAGCombine(ISD::CONCAT_VECTORS);
   setTargetDAGCombine(ISD::STORE);
   if (Subtarget->supportsAddressTopByteIgnored())
     setTargetDAGCombine(ISD::LOAD);
 
-  setTargetDAGCombine(ISD::MGATHER);
-  setTargetDAGCombine(ISD::MSCATTER);
-
+  setTargetDAGCombine(ISD::MGATHER); 
+  setTargetDAGCombine(ISD::MSCATTER); 
+ 
   setTargetDAGCombine(ISD::MUL);
 
   setTargetDAGCombine(ISD::SELECT);
@@ -884,8 +884,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::INTRINSIC_VOID);
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
-  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
-  setTargetDAGCombine(ISD::VECREDUCE_ADD);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 
+  setTargetDAGCombine(ISD::VECREDUCE_ADD); 
 
   setTargetDAGCombine(ISD::GlobalAddress);
 
@@ -1005,34 +1005,34 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 
-    // Saturates
+    // Saturates 
     for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
                     MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SADDSAT, VT, Legal);
       setOperationAction(ISD::UADDSAT, VT, Legal);
       setOperationAction(ISD::SSUBSAT, VT, Legal);
       setOperationAction(ISD::USUBSAT, VT, Legal);
-    }
+    } 
 
-    // Vector reductions
+    // Vector reductions 
     for (MVT VT : { MVT::v4f16, MVT::v2f32,
                     MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
-      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
-        setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
-        setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
-
-        setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
-      }
-    }
-    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
-                    MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
-      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { 
+        setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); 
+        setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); 
+ 
+        setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); 
+      } 
     }
-    setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
+    for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, 
+                    MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { 
+      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 
+    } 
+    setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom); 
 
     setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
     setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -1093,112 +1093,112 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
     // splat of 0 or undef) once vector selects supported in SVE codegen. See
     // D68877 for more details.
-    for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
-      setOperationAction(ISD::BITREVERSE, VT, Custom);
-      setOperationAction(ISD::BSWAP, VT, Custom);
-      setOperationAction(ISD::CTLZ, VT, Custom);
-      setOperationAction(ISD::CTPOP, VT, Custom);
-      setOperationAction(ISD::CTTZ, VT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
-      setOperationAction(ISD::UINT_TO_FP, VT, Custom);
-      setOperationAction(ISD::SINT_TO_FP, VT, Custom);
-      setOperationAction(ISD::FP_TO_UINT, VT, Custom);
-      setOperationAction(ISD::FP_TO_SINT, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
-      setOperationAction(ISD::MUL, VT, Custom);
-      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
-      setOperationAction(ISD::SELECT, VT, Custom);
-      setOperationAction(ISD::SDIV, VT, Custom);
-      setOperationAction(ISD::UDIV, VT, Custom);
-      setOperationAction(ISD::SMIN, VT, Custom);
-      setOperationAction(ISD::UMIN, VT, Custom);
-      setOperationAction(ISD::SMAX, VT, Custom);
-      setOperationAction(ISD::UMAX, VT, Custom);
-      setOperationAction(ISD::SHL, VT, Custom);
-      setOperationAction(ISD::SRL, VT, Custom);
-      setOperationAction(ISD::SRA, VT, Custom);
-      setOperationAction(ISD::ABS, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+    for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { 
+      setOperationAction(ISD::BITREVERSE, VT, Custom); 
+      setOperationAction(ISD::BSWAP, VT, Custom); 
+      setOperationAction(ISD::CTLZ, VT, Custom); 
+      setOperationAction(ISD::CTPOP, VT, Custom); 
+      setOperationAction(ISD::CTTZ, VT, Custom); 
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 
+      setOperationAction(ISD::UINT_TO_FP, VT, Custom); 
+      setOperationAction(ISD::SINT_TO_FP, VT, Custom); 
+      setOperationAction(ISD::FP_TO_UINT, VT, Custom); 
+      setOperationAction(ISD::FP_TO_SINT, VT, Custom); 
+      setOperationAction(ISD::MGATHER, VT, Custom); 
+      setOperationAction(ISD::MSCATTER, VT, Custom); 
+      setOperationAction(ISD::MUL, VT, Custom); 
+      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 
+      setOperationAction(ISD::SELECT, VT, Custom); 
+      setOperationAction(ISD::SDIV, VT, Custom); 
+      setOperationAction(ISD::UDIV, VT, Custom); 
+      setOperationAction(ISD::SMIN, VT, Custom); 
+      setOperationAction(ISD::UMIN, VT, Custom); 
+      setOperationAction(ISD::SMAX, VT, Custom); 
+      setOperationAction(ISD::UMAX, VT, Custom); 
+      setOperationAction(ISD::SHL, VT, Custom); 
+      setOperationAction(ISD::SRL, VT, Custom); 
+      setOperationAction(ISD::SRA, VT, Custom); 
+      setOperationAction(ISD::ABS, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 
     }
 
-    // Illegal unpacked integer vector types.
-    for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
+    // Illegal unpacked integer vector types. 
+    for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { 
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
-    }
-
-    for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
-      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
-      setOperationAction(ISD::SELECT, VT, Custom);
-      setOperationAction(ISD::SETCC, VT, Custom);
-      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
-      setOperationAction(ISD::TRUNCATE, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
-
-      // There are no legal MVT::nxv16f## based types.
-      if (VT != MVT::nxv16i1) {
-        setOperationAction(ISD::SINT_TO_FP, VT, Custom);
-        setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 
+    } 
+
+    for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { 
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 
+      setOperationAction(ISD::SELECT, VT, Custom); 
+      setOperationAction(ISD::SETCC, VT, Custom); 
+      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 
+      setOperationAction(ISD::TRUNCATE, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 
+
+      // There are no legal MVT::nxv16f## based types. 
+      if (VT != MVT::nxv16i1) { 
+        setOperationAction(ISD::SINT_TO_FP, VT, Custom); 
+        setOperationAction(ISD::UINT_TO_FP, VT, Custom); 
       }
     }
 
-    for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
-                    MVT::nxv4f32, MVT::nxv2f64}) {
-      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
-      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
-      setOperationAction(ISD::SELECT, VT, Custom);
-      setOperationAction(ISD::FADD, VT, Custom);
-      setOperationAction(ISD::FDIV, VT, Custom);
-      setOperationAction(ISD::FMA, VT, Custom);
-      setOperationAction(ISD::FMAXNUM, VT, Custom);
-      setOperationAction(ISD::FMINNUM, VT, Custom);
-      setOperationAction(ISD::FMUL, VT, Custom);
-      setOperationAction(ISD::FNEG, VT, Custom);
-      setOperationAction(ISD::FSUB, VT, Custom);
-      setOperationAction(ISD::FCEIL, VT, Custom);
-      setOperationAction(ISD::FFLOOR, VT, Custom);
-      setOperationAction(ISD::FNEARBYINT, VT, Custom);
-      setOperationAction(ISD::FRINT, VT, Custom);
-      setOperationAction(ISD::FROUND, VT, Custom);
-      setOperationAction(ISD::FROUNDEVEN, VT, Custom);
-      setOperationAction(ISD::FTRUNC, VT, Custom);
-      setOperationAction(ISD::FSQRT, VT, Custom);
-      setOperationAction(ISD::FABS, VT, Custom);
-      setOperationAction(ISD::FP_EXTEND, VT, Custom);
-      setOperationAction(ISD::FP_ROUND, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
-    }
-
-    for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
-      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
-    }
-
-    setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
-
-    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
-    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
-
+    for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, 
+                    MVT::nxv4f32, MVT::nxv2f64}) { 
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); 
+      setOperationAction(ISD::MGATHER, VT, Custom); 
+      setOperationAction(ISD::MSCATTER, VT, Custom); 
+      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 
+      setOperationAction(ISD::SELECT, VT, Custom); 
+      setOperationAction(ISD::FADD, VT, Custom); 
+      setOperationAction(ISD::FDIV, VT, Custom); 
+      setOperationAction(ISD::FMA, VT, Custom); 
+      setOperationAction(ISD::FMAXNUM, VT, Custom); 
+      setOperationAction(ISD::FMINNUM, VT, Custom); 
+      setOperationAction(ISD::FMUL, VT, Custom); 
+      setOperationAction(ISD::FNEG, VT, Custom); 
+      setOperationAction(ISD::FSUB, VT, Custom); 
+      setOperationAction(ISD::FCEIL, VT, Custom); 
+      setOperationAction(ISD::FFLOOR, VT, Custom); 
+      setOperationAction(ISD::FNEARBYINT, VT, Custom); 
+      setOperationAction(ISD::FRINT, VT, Custom); 
+      setOperationAction(ISD::FROUND, VT, Custom); 
+      setOperationAction(ISD::FROUNDEVEN, VT, Custom); 
+      setOperationAction(ISD::FTRUNC, VT, Custom); 
+      setOperationAction(ISD::FSQRT, VT, Custom); 
+      setOperationAction(ISD::FABS, VT, Custom); 
+      setOperationAction(ISD::FP_EXTEND, VT, Custom); 
+      setOperationAction(ISD::FP_ROUND, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); 
+      setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 
+    } 
+ 
+    for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { 
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 
+      setOperationAction(ISD::MGATHER, VT, Custom); 
+      setOperationAction(ISD::MSCATTER, VT, Custom); 
+    } 
+ 
+    setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); 
+ 
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); 
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); 
+ 
     // NOTE: Currently this has to happen after computeRegisterProperties rather
     // than the preferred option of combining it with the addRegisterClass call.
-    if (Subtarget->useSVEForFixedLengthVectors()) {
+    if (Subtarget->useSVEForFixedLengthVectors()) { 
       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
         if (useSVEForFixedLengthVectorVT(VT))
           addTypeForFixedLengthSVE(VT);
@@ -1216,61 +1216,61 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::TRUNCATE, VT, Custom);
       for (auto VT : {MVT::v8f16, MVT::v4f32})
         setOperationAction(ISD::FP_ROUND, VT, Expand);
-
-      // These operations are not supported on NEON but SVE can do them.
-      setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
-      setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
-      setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
-      setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
-      setOperationAction(ISD::MUL, MVT::v1i64, Custom);
-      setOperationAction(ISD::MUL, MVT::v2i64, Custom);
-      setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
-      setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
-      setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
-      setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
-      setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
-      setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
-      setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
-      setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
-      setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
-      setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
-      setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
-      setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
-      setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
-      setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
-      setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
-      setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
-      setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
-      setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
-      setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
-      setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
-      setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
-      setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
-      setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
-      setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
-      setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
-      setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
-      setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
-      setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
-
-      // Int operations with no NEON support.
-      for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
-                      MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
-        setOperationAction(ISD::BITREVERSE, VT, Custom);
-        setOperationAction(ISD::CTTZ, VT, Custom);
-        setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
-        setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
-        setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
-      }
-
-      // FP operations with no NEON support.
-      for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
-                      MVT::v1f64, MVT::v2f64})
-        setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
-
-      // Use SVE for vectors with more than 2 elements.
-      for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
-        setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ 
+      // These operations are not supported on NEON but SVE can do them. 
+      setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); 
+      setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); 
+      setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); 
+      setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); 
+      setOperationAction(ISD::MUL, MVT::v1i64, Custom); 
+      setOperationAction(ISD::MUL, MVT::v2i64, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v16i8, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v8i16, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v2i32, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v4i32, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v1i64, Custom); 
+      setOperationAction(ISD::SDIV, MVT::v2i64, Custom); 
+      setOperationAction(ISD::SMAX, MVT::v1i64, Custom); 
+      setOperationAction(ISD::SMAX, MVT::v2i64, Custom); 
+      setOperationAction(ISD::SMIN, MVT::v1i64, Custom); 
+      setOperationAction(ISD::SMIN, MVT::v2i64, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v16i8, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v8i16, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v2i32, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v4i32, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v1i64, Custom); 
+      setOperationAction(ISD::UDIV, MVT::v2i64, Custom); 
+      setOperationAction(ISD::UMAX, MVT::v1i64, Custom); 
+      setOperationAction(ISD::UMAX, MVT::v2i64, Custom); 
+      setOperationAction(ISD::UMIN, MVT::v1i64, Custom); 
+      setOperationAction(ISD::UMIN, MVT::v2i64, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); 
+      setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); 
+      setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom); 
+ 
+      // Int operations with no NEON support. 
+      for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, 
+                      MVT::v2i32, MVT::v4i32, MVT::v2i64}) { 
+        setOperationAction(ISD::BITREVERSE, VT, Custom); 
+        setOperationAction(ISD::CTTZ, VT, Custom); 
+        setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 
+        setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 
+        setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 
+      } 
+ 
+      // FP operations with no NEON support. 
+      for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, 
+                      MVT::v1f64, MVT::v2f64}) 
+        setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 
+ 
+      // Use SVE for vectors with more than 2 elements. 
+      for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) 
+        setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 
     }
   }
 
@@ -1342,7 +1342,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
 
   // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
   if (VT.isFloatingPoint() &&
-      VT.getVectorElementType() != MVT::bf16 &&
+      VT.getVectorElementType() != MVT::bf16 && 
       (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
     for (unsigned Opcode :
          {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
@@ -1368,64 +1368,64 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 
   // Lower fixed length vector operations to scalable equivalents.
-  setOperationAction(ISD::ABS, VT, Custom);
+  setOperationAction(ISD::ABS, VT, Custom); 
   setOperationAction(ISD::ADD, VT, Custom);
-  setOperationAction(ISD::AND, VT, Custom);
-  setOperationAction(ISD::ANY_EXTEND, VT, Custom);
-  setOperationAction(ISD::BITREVERSE, VT, Custom);
-  setOperationAction(ISD::BSWAP, VT, Custom);
-  setOperationAction(ISD::CTLZ, VT, Custom);
-  setOperationAction(ISD::CTPOP, VT, Custom);
-  setOperationAction(ISD::CTTZ, VT, Custom);
+  setOperationAction(ISD::AND, VT, Custom); 
+  setOperationAction(ISD::ANY_EXTEND, VT, Custom); 
+  setOperationAction(ISD::BITREVERSE, VT, Custom); 
+  setOperationAction(ISD::BSWAP, VT, Custom); 
+  setOperationAction(ISD::CTLZ, VT, Custom); 
+  setOperationAction(ISD::CTPOP, VT, Custom); 
+  setOperationAction(ISD::CTTZ, VT, Custom); 
   setOperationAction(ISD::FADD, VT, Custom);
-  setOperationAction(ISD::FCEIL, VT, Custom);
-  setOperationAction(ISD::FDIV, VT, Custom);
-  setOperationAction(ISD::FFLOOR, VT, Custom);
-  setOperationAction(ISD::FMA, VT, Custom);
-  setOperationAction(ISD::FMAXNUM, VT, Custom);
-  setOperationAction(ISD::FMINNUM, VT, Custom);
-  setOperationAction(ISD::FMUL, VT, Custom);
-  setOperationAction(ISD::FNEARBYINT, VT, Custom);
-  setOperationAction(ISD::FNEG, VT, Custom);
-  setOperationAction(ISD::FRINT, VT, Custom);
-  setOperationAction(ISD::FROUND, VT, Custom);
-  setOperationAction(ISD::FSQRT, VT, Custom);
-  setOperationAction(ISD::FSUB, VT, Custom);
-  setOperationAction(ISD::FTRUNC, VT, Custom);
+  setOperationAction(ISD::FCEIL, VT, Custom); 
+  setOperationAction(ISD::FDIV, VT, Custom); 
+  setOperationAction(ISD::FFLOOR, VT, Custom); 
+  setOperationAction(ISD::FMA, VT, Custom); 
+  setOperationAction(ISD::FMAXNUM, VT, Custom); 
+  setOperationAction(ISD::FMINNUM, VT, Custom); 
+  setOperationAction(ISD::FMUL, VT, Custom); 
+  setOperationAction(ISD::FNEARBYINT, VT, Custom); 
+  setOperationAction(ISD::FNEG, VT, Custom); 
+  setOperationAction(ISD::FRINT, VT, Custom); 
+  setOperationAction(ISD::FROUND, VT, Custom); 
+  setOperationAction(ISD::FSQRT, VT, Custom); 
+  setOperationAction(ISD::FSUB, VT, Custom); 
+  setOperationAction(ISD::FTRUNC, VT, Custom); 
   setOperationAction(ISD::LOAD, VT, Custom);
-  setOperationAction(ISD::MUL, VT, Custom);
-  setOperationAction(ISD::OR, VT, Custom);
-  setOperationAction(ISD::SDIV, VT, Custom);
-  setOperationAction(ISD::SETCC, VT, Custom);
-  setOperationAction(ISD::SHL, VT, Custom);
-  setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
-  setOperationAction(ISD::SMAX, VT, Custom);
-  setOperationAction(ISD::SMIN, VT, Custom);
-  setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
-  setOperationAction(ISD::SRA, VT, Custom);
-  setOperationAction(ISD::SRL, VT, Custom);
+  setOperationAction(ISD::MUL, VT, Custom); 
+  setOperationAction(ISD::OR, VT, Custom); 
+  setOperationAction(ISD::SDIV, VT, Custom); 
+  setOperationAction(ISD::SETCC, VT, Custom); 
+  setOperationAction(ISD::SHL, VT, Custom); 
+  setOperationAction(ISD::SIGN_EXTEND, VT, Custom); 
+  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 
+  setOperationAction(ISD::SMAX, VT, Custom); 
+  setOperationAction(ISD::SMIN, VT, Custom); 
+  setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); 
+  setOperationAction(ISD::SRA, VT, Custom); 
+  setOperationAction(ISD::SRL, VT, Custom); 
   setOperationAction(ISD::STORE, VT, Custom);
-  setOperationAction(ISD::SUB, VT, Custom);
+  setOperationAction(ISD::SUB, VT, Custom); 
   setOperationAction(ISD::TRUNCATE, VT, Custom);
-  setOperationAction(ISD::UDIV, VT, Custom);
-  setOperationAction(ISD::UMAX, VT, Custom);
-  setOperationAction(ISD::UMIN, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
-  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
-  setOperationAction(ISD::VSELECT, VT, Custom);
-  setOperationAction(ISD::XOR, VT, Custom);
-  setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+  setOperationAction(ISD::UDIV, VT, Custom); 
+  setOperationAction(ISD::UMAX, VT, Custom); 
+  setOperationAction(ISD::UMIN, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_AND, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_OR, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); 
+  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); 
+  setOperationAction(ISD::VSELECT, VT, Custom); 
+  setOperationAction(ISD::XOR, VT, Custom); 
+  setOperationAction(ISD::ZERO_EXTEND, VT, Custom); 
 }
 
 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -1597,7 +1597,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     KnownBits Known2;
     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
-    Known = KnownBits::commonBits(Known, Known2);
+    Known = KnownBits::commonBits(Known, Known2); 
     break;
   }
   case AArch64ISD::LOADgot:
@@ -1737,38 +1737,38 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::THREAD_POINTER)
     MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
     MAKE_CASE(AArch64ISD::ADD_PRED)
-    MAKE_CASE(AArch64ISD::MUL_PRED)
+    MAKE_CASE(AArch64ISD::MUL_PRED) 
     MAKE_CASE(AArch64ISD::SDIV_PRED)
-    MAKE_CASE(AArch64ISD::SHL_PRED)
-    MAKE_CASE(AArch64ISD::SMAX_PRED)
-    MAKE_CASE(AArch64ISD::SMIN_PRED)
-    MAKE_CASE(AArch64ISD::SRA_PRED)
-    MAKE_CASE(AArch64ISD::SRL_PRED)
-    MAKE_CASE(AArch64ISD::SUB_PRED)
+    MAKE_CASE(AArch64ISD::SHL_PRED) 
+    MAKE_CASE(AArch64ISD::SMAX_PRED) 
+    MAKE_CASE(AArch64ISD::SMIN_PRED) 
+    MAKE_CASE(AArch64ISD::SRA_PRED) 
+    MAKE_CASE(AArch64ISD::SRL_PRED) 
+    MAKE_CASE(AArch64ISD::SUB_PRED) 
     MAKE_CASE(AArch64ISD::UDIV_PRED)
-    MAKE_CASE(AArch64ISD::UMAX_PRED)
-    MAKE_CASE(AArch64ISD::UMIN_PRED)
-    MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::UMAX_PRED) 
+    MAKE_CASE(AArch64ISD::UMIN_PRED) 
+    MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) 
     MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::ADC)
     MAKE_CASE(AArch64ISD::SBC)
@@ -1837,14 +1837,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::UADDV)
     MAKE_CASE(AArch64ISD::SRHADD)
     MAKE_CASE(AArch64ISD::URHADD)
-    MAKE_CASE(AArch64ISD::SHADD)
-    MAKE_CASE(AArch64ISD::UHADD)
+    MAKE_CASE(AArch64ISD::SHADD) 
+    MAKE_CASE(AArch64ISD::UHADD) 
     MAKE_CASE(AArch64ISD::SMINV)
     MAKE_CASE(AArch64ISD::UMINV)
     MAKE_CASE(AArch64ISD::SMAXV)
     MAKE_CASE(AArch64ISD::UMAXV)
-    MAKE_CASE(AArch64ISD::SADDV_PRED)
-    MAKE_CASE(AArch64ISD::UADDV_PRED)
+    MAKE_CASE(AArch64ISD::SADDV_PRED) 
+    MAKE_CASE(AArch64ISD::UADDV_PRED) 
     MAKE_CASE(AArch64ISD::SMAXV_PRED)
     MAKE_CASE(AArch64ISD::UMAXV_PRED)
     MAKE_CASE(AArch64ISD::SMINV_PRED)
@@ -1862,16 +1862,16 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FADD_PRED)
     MAKE_CASE(AArch64ISD::FADDA_PRED)
     MAKE_CASE(AArch64ISD::FADDV_PRED)
-    MAKE_CASE(AArch64ISD::FDIV_PRED)
+    MAKE_CASE(AArch64ISD::FDIV_PRED) 
     MAKE_CASE(AArch64ISD::FMA_PRED)
     MAKE_CASE(AArch64ISD::FMAXV_PRED)
-    MAKE_CASE(AArch64ISD::FMAXNM_PRED)
+    MAKE_CASE(AArch64ISD::FMAXNM_PRED) 
     MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
     MAKE_CASE(AArch64ISD::FMINV_PRED)
-    MAKE_CASE(AArch64ISD::FMINNM_PRED)
+    MAKE_CASE(AArch64ISD::FMINNM_PRED) 
     MAKE_CASE(AArch64ISD::FMINNMV_PRED)
-    MAKE_CASE(AArch64ISD::FMUL_PRED)
-    MAKE_CASE(AArch64ISD::FSUB_PRED)
+    MAKE_CASE(AArch64ISD::FMUL_PRED) 
+    MAKE_CASE(AArch64ISD::FSUB_PRED) 
     MAKE_CASE(AArch64ISD::BIT)
     MAKE_CASE(AArch64ISD::CBZ)
     MAKE_CASE(AArch64ISD::CBNZ)
@@ -1983,15 +1983,15 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::LDP)
     MAKE_CASE(AArch64ISD::STP)
     MAKE_CASE(AArch64ISD::STNP)
-    MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
-    MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) 
+    MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) 
     MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::INDEX_VECTOR)
-    MAKE_CASE(AArch64ISD::UABD)
-    MAKE_CASE(AArch64ISD::SABD)
-    MAKE_CASE(AArch64ISD::CALL_RVMARKER)
+    MAKE_CASE(AArch64ISD::UABD) 
+    MAKE_CASE(AArch64ISD::SABD) 
+    MAKE_CASE(AArch64ISD::CALL_RVMARKER) 
   }
 #undef MAKE_CASE
   return nullptr;
@@ -2079,7 +2079,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
 
   case TargetOpcode::STACKMAP:
   case TargetOpcode::PATCHPOINT:
-  case TargetOpcode::STATEPOINT:
+  case TargetOpcode::STATEPOINT: 
     return emitPatchPoint(MI, BB);
 
   case AArch64::CATCHRET:
@@ -2905,9 +2905,9 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
   return std::make_pair(Value, Overflow);
 }
 
-SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
-  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
-    return LowerToScalableOp(Op, DAG);
+SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { 
+  if (useSVEForFixedLengthVectorVT(Op.getValueType())) 
+    return LowerToScalableOp(Op, DAG); 
 
   SDValue Sel = Op.getOperand(0);
   SDValue Other = Op.getOperand(1);
@@ -3083,18 +3083,18 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
 
 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
                                               SelectionDAG &DAG) const {
-  if (Op.getValueType().isScalableVector())
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
-
+  if (Op.getValueType().isScalableVector()) 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); 
+ 
   assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
-  return SDValue();
+  return SDValue(); 
 }
 
 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
                                              SelectionDAG &DAG) const {
-  if (Op.getValueType().isScalableVector())
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
-
+  if (Op.getValueType().isScalableVector()) 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); 
+ 
   bool IsStrict = Op->isStrictFPOpcode();
   SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
   EVT SrcVT = SrcVal.getValueType();
@@ -3108,7 +3108,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
     return Op;
   }
 
-  return SDValue();
+  return SDValue(); 
 }
 
 SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
@@ -3118,14 +3118,14 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
   // in the cost tables.
   EVT InVT = Op.getOperand(0).getValueType();
   EVT VT = Op.getValueType();
-
-  if (VT.isScalableVector()) {
-    unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
-                          ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
-                          : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
-    return LowerToPredicatedOp(Op, DAG, Opcode);
-  }
-
+ 
+  if (VT.isScalableVector()) { 
+    unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT 
+                          ? AArch64ISD::FCVTZU_MERGE_PASSTHRU 
+                          : AArch64ISD::FCVTZS_MERGE_PASSTHRU; 
+    return LowerToPredicatedOp(Op, DAG, Opcode); 
+  } 
+ 
   unsigned NumElts = InVT.getVectorNumElements();
 
   // f16 conversions are promoted to f32 when full fp16 is not supported.
@@ -3138,9 +3138,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
         DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
   }
 
-  uint64_t VTSize = VT.getFixedSizeInBits();
-  uint64_t InVTSize = InVT.getFixedSizeInBits();
-  if (VTSize < InVTSize) {
+  uint64_t VTSize = VT.getFixedSizeInBits(); 
+  uint64_t InVTSize = InVT.getFixedSizeInBits(); 
+  if (VTSize < InVTSize) { 
     SDLoc dl(Op);
     SDValue Cv =
         DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
@@ -3148,7 +3148,7 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
     return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
   }
 
-  if (VTSize > InVTSize) {
+  if (VTSize > InVTSize) { 
     SDLoc dl(Op);
     MVT ExtVT =
         MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
@@ -3183,11 +3183,11 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
     return Op;
   }
 
-  return SDValue();
+  return SDValue(); 
 }
 
-SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
-                                                    SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, 
+                                                    SelectionDAG &DAG) const { 
   // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
   // Any additional optimization in this function should be recorded
   // in the cost tables.
@@ -3195,38 +3195,38 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
   SDLoc dl(Op);
   SDValue In = Op.getOperand(0);
   EVT InVT = In.getValueType();
-  unsigned Opc = Op.getOpcode();
-  bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
-
-  if (VT.isScalableVector()) {
-    if (InVT.getVectorElementType() == MVT::i1) {
-      // We can't directly extend an SVE predicate; extend it first.
-      unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-      EVT CastVT = getPromotedVTForPredicate(InVT);
-      In = DAG.getNode(CastOpc, dl, CastVT, In);
-      return DAG.getNode(Opc, dl, VT, In);
-    }
-
-    unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
-                               : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
-    return LowerToPredicatedOp(Op, DAG, Opcode);
-  }
-
-  uint64_t VTSize = VT.getFixedSizeInBits();
-  uint64_t InVTSize = InVT.getFixedSizeInBits();
-  if (VTSize < InVTSize) {
+  unsigned Opc = Op.getOpcode(); 
+  bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; 
+
+  if (VT.isScalableVector()) { 
+    if (InVT.getVectorElementType() == MVT::i1) { 
+      // We can't directly extend an SVE predicate; extend it first. 
+      unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 
+      EVT CastVT = getPromotedVTForPredicate(InVT); 
+      In = DAG.getNode(CastOpc, dl, CastVT, In); 
+      return DAG.getNode(Opc, dl, VT, In); 
+    } 
+ 
+    unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU 
+                               : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU; 
+    return LowerToPredicatedOp(Op, DAG, Opcode); 
+  } 
+ 
+  uint64_t VTSize = VT.getFixedSizeInBits(); 
+  uint64_t InVTSize = InVT.getFixedSizeInBits(); 
+  if (VTSize < InVTSize) { 
     MVT CastVT =
         MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                          InVT.getVectorNumElements());
-    In = DAG.getNode(Opc, dl, CastVT, In);
+    In = DAG.getNode(Opc, dl, CastVT, In); 
     return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
   }
 
-  if (VTSize > InVTSize) {
-    unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+  if (VTSize > InVTSize) { 
+    unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 
     EVT CastVT = VT.changeVectorElementTypeToInteger();
     In = DAG.getNode(CastOpc, dl, CastVT, In);
-    return DAG.getNode(Opc, dl, VT, In);
+    return DAG.getNode(Opc, dl, VT, In); 
   }
 
   return Op;
@@ -3259,7 +3259,7 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
   // fp128.
   if (Op.getValueType() != MVT::f128)
     return Op;
-  return SDValue();
+  return SDValue(); 
 }
 
 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
@@ -3373,8 +3373,8 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
 }
 
 static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
-  if (N->getOpcode() == ISD::SIGN_EXTEND ||
-      N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
+  if (N->getOpcode() == ISD::SIGN_EXTEND || 
+      N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND) 
     return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
                                              N->getOperand(0)->getValueType(0),
                                              N->getValueType(0),
@@ -3399,13 +3399,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
 
 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   return N->getOpcode() == ISD::SIGN_EXTEND ||
-         N->getOpcode() == ISD::ANY_EXTEND ||
+         N->getOpcode() == ISD::ANY_EXTEND || 
          isExtendedBUILD_VECTOR(N, DAG, true);
 }
 
 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return N->getOpcode() == ISD::ZERO_EXTEND ||
-         N->getOpcode() == ISD::ANY_EXTEND ||
+         N->getOpcode() == ISD::ANY_EXTEND || 
          isExtendedBUILD_VECTOR(N, DAG, false);
 }
 
@@ -3454,15 +3454,15 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   return DAG.getMergeValues({AND, Chain}, dl);
 }
 
-SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-
-  // If SVE is available then i64 vector multiplications can also be made legal.
-  bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
-
-  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
-
+SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+ 
+  // If SVE is available then i64 vector multiplications can also be made legal. 
+  bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64; 
+ 
+  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON)) 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON); 
+ 
   // Multiplications are only custom-lowered for 128-bit vectors so that
   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
   assert(VT.is128BitVector() && VT.isInteger() &&
@@ -3623,77 +3623,77 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::aarch64_sve_ptrue:
     return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
                        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_clz:
-    return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_cnt: {
-    SDValue Data = Op.getOperand(3);
-    // CTPOP only supports integer operands.
-    if (Data.getValueType().isFloatingPoint())
-      Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
-    return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Data, Op.getOperand(1));
-  }
+  case Intrinsic::aarch64_sve_clz: 
+    return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_cnt: { 
+    SDValue Data = Op.getOperand(3); 
+    // CTPOP only supports integer operands. 
+    if (Data.getValueType().isFloatingPoint()) 
+      Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data); 
+    return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Data, Op.getOperand(1)); 
+  } 
   case Intrinsic::aarch64_sve_dupq_lane:
     return LowerDUPQLane(Op, DAG);
   case Intrinsic::aarch64_sve_convert_from_svbool:
     return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
                        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_fneg:
-    return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frintp:
-    return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frintm:
-    return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frinti:
-    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frintx:
-    return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frinta:
-    return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frintn:
-    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frintz:
-    return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_ucvtf:
-    return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
-  case Intrinsic::aarch64_sve_scvtf:
-    return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
-  case Intrinsic::aarch64_sve_fcvtzu:
-    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
-  case Intrinsic::aarch64_sve_fcvtzs:
-    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
-  case Intrinsic::aarch64_sve_fsqrt:
-    return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_frecpx:
-    return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_fabs:
-    return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_abs:
-    return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_neg:
-    return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_fneg: 
+    return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frintp: 
+    return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frintm: 
+    return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frinti: 
+    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frintx: 
+    return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frinta: 
+    return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frintn: 
+    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frintz: 
+    return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_ucvtf: 
+    return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl, 
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3), 
+                       Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_scvtf: 
+    return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl, 
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3), 
+                       Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_fcvtzu: 
+    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, 
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3), 
+                       Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_fcvtzs: 
+    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, 
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3), 
+                       Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_fsqrt: 
+    return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_frecpx: 
+    return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_fabs: 
+    return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_abs: 
+    return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_neg: 
+    return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
   case Intrinsic::aarch64_sve_convert_to_svbool: {
     EVT OutVT = Op.getValueType();
     EVT InVT = Op.getOperand(1).getValueType();
@@ -3719,49 +3719,49 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
                        Op.getOperand(1), Scalar);
   }
-  case Intrinsic::aarch64_sve_rbit:
-    return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
-                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
-                       Op.getOperand(1));
-  case Intrinsic::aarch64_sve_revb:
-    return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
-                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
-  case Intrinsic::aarch64_sve_sxtb:
-    return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
-        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_sxth:
-    return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
-        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_sxtw:
-    return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
-        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_uxtb:
-    return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
-        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_uxth:
-    return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
-        Op.getOperand(1));
-  case Intrinsic::aarch64_sve_uxtw:
-    return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
-        Op.getOperand(2), Op.getOperand(3),
-        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
-        Op.getOperand(1));
+  case Intrinsic::aarch64_sve_rbit: 
+    return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl, 
+                       Op.getValueType(), Op.getOperand(2), Op.getOperand(3), 
+                       Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_revb: 
+    return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(), 
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_sxtb: 
+    return DAG.getNode( 
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), 
+        Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_sxth: 
+    return DAG.getNode( 
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), 
+        Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_sxtw: 
+    return DAG.getNode( 
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), 
+        Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_uxtb: 
+    return DAG.getNode( 
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), 
+        Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_uxth: 
+    return DAG.getNode( 
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), 
+        Op.getOperand(1)); 
+  case Intrinsic::aarch64_sve_uxtw: 
+    return DAG.getNode( 
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), 
+        Op.getOperand(2), Op.getOperand(3), 
+        DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), 
+        Op.getOperand(1)); 
 
   case Intrinsic::localaddress: {
     const auto &MF = DAG.getMachineFunction();
@@ -3801,291 +3801,291 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   }
 
   case Intrinsic::aarch64_neon_srhadd:
-  case Intrinsic::aarch64_neon_urhadd:
-  case Intrinsic::aarch64_neon_shadd:
-  case Intrinsic::aarch64_neon_uhadd: {
-    bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
-                        IntNo == Intrinsic::aarch64_neon_shadd);
-    bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
-                          IntNo == Intrinsic::aarch64_neon_urhadd);
-    unsigned Opcode =
-        IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
-                    : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
+  case Intrinsic::aarch64_neon_urhadd: 
+  case Intrinsic::aarch64_neon_shadd: 
+  case Intrinsic::aarch64_neon_uhadd: { 
+    bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || 
+                        IntNo == Intrinsic::aarch64_neon_shadd); 
+    bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || 
+                          IntNo == Intrinsic::aarch64_neon_urhadd); 
+    unsigned Opcode = 
+        IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD) 
+                    : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD); 
     return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   }
-
-  case Intrinsic::aarch64_neon_uabd: {
-    return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
-  }
-  case Intrinsic::aarch64_neon_sabd: {
-    return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
-  }
-  }
-}
-
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
-  if (VT.getVectorElementType() == MVT::i32 &&
-      VT.getVectorElementCount().getKnownMinValue() >= 4)
-    return true;
-
-  return false;
-}
-
+ 
+  case Intrinsic::aarch64_neon_uabd: { 
+    return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(), 
+                       Op.getOperand(1), Op.getOperand(2)); 
+  }
+  case Intrinsic::aarch64_neon_sabd: { 
+    return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(), 
+                       Op.getOperand(1), Op.getOperand(2)); 
+  } 
+  } 
+}
+
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { 
+  if (VT.getVectorElementType() == MVT::i32 && 
+      VT.getVectorElementCount().getKnownMinValue() >= 4) 
+    return true; 
+ 
+  return false; 
+} 
+ 
 bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
   return ExtVal.getValueType().isScalableVector();
 }
 
-unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
-  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
-       AArch64ISD::GLD1_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
-       AArch64ISD::GLD1_UXTW_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
-       AArch64ISD::GLD1_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
-       AArch64ISD::GLD1_SXTW_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
-       AArch64ISD::GLD1_SCALED_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
-       AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
-       AArch64ISD::GLD1_SCALED_MERGE_ZERO},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
-       AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
-  };
-  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
-  return AddrModes.find(Key)->second;
-}
-
-unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
-  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
-       AArch64ISD::SST1_PRED},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
-       AArch64ISD::SST1_UXTW_PRED},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
-       AArch64ISD::SST1_PRED},
-      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
-       AArch64ISD::SST1_SXTW_PRED},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
-       AArch64ISD::SST1_SCALED_PRED},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
-       AArch64ISD::SST1_UXTW_SCALED_PRED},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
-       AArch64ISD::SST1_SCALED_PRED},
-      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
-       AArch64ISD::SST1_SXTW_SCALED_PRED},
-  };
-  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
-  return AddrModes.find(Key)->second;
-}
-
-unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unimplemented opcode");
-    return Opcode;
-  case AArch64ISD::GLD1_MERGE_ZERO:
-    return AArch64ISD::GLD1S_MERGE_ZERO;
-  case AArch64ISD::GLD1_IMM_MERGE_ZERO:
-    return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
-  case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
-    return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
-  case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
-    return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
-  case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
-    return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
-  case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
-    return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
-  case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
-    return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
-  }
-}
-
-bool getGatherScatterIndexIsExtended(SDValue Index) {
-  unsigned Opcode = Index.getOpcode();
-  if (Opcode == ISD::SIGN_EXTEND_INREG)
-    return true;
-
-  if (Opcode == ISD::AND) {
-    SDValue Splat = Index.getOperand(1);
-    if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
-      return false;
-    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
-    if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
-      return false;
-    return true;
-  }
-
-  return false;
-}
-
-// If the base pointer of a masked gather or scatter is null, we
-// may be able to swap BasePtr & Index and use the vector + register
-// or vector + immediate addressing mode, e.g.
-// VECTOR + REGISTER:
-//    getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
-// -> getelementptr %offset, <vscale x N x T> %indices
-// VECTOR + IMMEDIATE:
-//    getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
-// -> getelementptr #x, <vscale x N x T> %indices
-void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
-                                 unsigned &Opcode, bool IsGather,
-                                 SelectionDAG &DAG) {
-  if (!isNullConstant(BasePtr))
-    return;
-
-  ConstantSDNode *Offset = nullptr;
-  if (Index.getOpcode() == ISD::ADD)
-    if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
-      if (isa<ConstantSDNode>(SplatVal))
-        Offset = cast<ConstantSDNode>(SplatVal);
-      else {
-        BasePtr = SplatVal;
-        Index = Index->getOperand(0);
-        return;
-      }
-    }
-
-  unsigned NewOp =
-      IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
-
-  if (!Offset) {
-    std::swap(BasePtr, Index);
-    Opcode = NewOp;
-    return;
-  }
-
-  uint64_t OffsetVal = Offset->getZExtValue();
-  unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
-  auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
-
-  if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
-    // Index is out of range for the immediate addressing mode
-    BasePtr = ConstOffset;
-    Index = Index->getOperand(0);
-    return;
-  }
-
-  // Immediate is in range
-  Opcode = NewOp;
-  BasePtr = Index->getOperand(0);
-  Index = ConstOffset;
-}
-
-SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
-  assert(MGT && "Can only custom lower gather load nodes");
-
-  SDValue Index = MGT->getIndex();
-  SDValue Chain = MGT->getChain();
-  SDValue PassThru = MGT->getPassThru();
-  SDValue Mask = MGT->getMask();
-  SDValue BasePtr = MGT->getBasePtr();
-  ISD::LoadExtType ExtTy = MGT->getExtensionType();
-
-  ISD::MemIndexType IndexType = MGT->getIndexType();
-  bool IsScaled =
-      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
-  bool IsSigned =
-      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
-  bool IdxNeedsExtend =
-      getGatherScatterIndexIsExtended(Index) ||
-      Index.getSimpleValueType().getVectorElementType() == MVT::i32;
-  bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
-
-  EVT VT = PassThru.getSimpleValueType();
-  EVT MemVT = MGT->getMemoryVT();
-  SDValue InputVT = DAG.getValueType(MemVT);
-
-  if (VT.getVectorElementType() == MVT::bf16 &&
-      !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
-    return SDValue();
-
-  // Handle FP data by using an integer gather and casting the result.
-  if (VT.isFloatingPoint()) {
-    EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
-    PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
-    InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
-  }
-
-  SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
-
-  if (getGatherScatterIndexIsExtended(Index))
-    Index = Index.getOperand(0);
-
-  unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
-  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
-                              /*isGather=*/true, DAG);
-
-  if (ResNeedsSignExtend)
-    Opcode = getSignExtendedGatherOpcode(Opcode);
-
-  SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
-  SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
-
-  if (VT.isFloatingPoint()) {
-    SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
-    return DAG.getMergeValues({Cast, Gather}, DL);
-  }
-
-  return Gather;
-}
-
-SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
-  assert(MSC && "Can only custom lower scatter store nodes");
-
-  SDValue Index = MSC->getIndex();
-  SDValue Chain = MSC->getChain();
-  SDValue StoreVal = MSC->getValue();
-  SDValue Mask = MSC->getMask();
-  SDValue BasePtr = MSC->getBasePtr();
-
-  ISD::MemIndexType IndexType = MSC->getIndexType();
-  bool IsScaled =
-      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
-  bool IsSigned =
-      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
-  bool NeedsExtend =
-      getGatherScatterIndexIsExtended(Index) ||
-      Index.getSimpleValueType().getVectorElementType() == MVT::i32;
-
-  EVT VT = StoreVal.getSimpleValueType();
-  SDVTList VTs = DAG.getVTList(MVT::Other);
-  EVT MemVT = MSC->getMemoryVT();
-  SDValue InputVT = DAG.getValueType(MemVT);
-
-  if (VT.getVectorElementType() == MVT::bf16 &&
-      !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
-    return SDValue();
-
-  // Handle FP data by casting the data so an integer scatter can be used.
-  if (VT.isFloatingPoint()) {
-    EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
-    StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
-    InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
-  }
-
-  if (getGatherScatterIndexIsExtended(Index))
-    Index = Index.getOperand(0);
-
-  unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
-  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
-                              /*isGather=*/false, DAG);
-
-  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
-  return DAG.getNode(Opcode, DL, VTs, Ops);
-}
-
+unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { 
+  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false), 
+       AArch64ISD::GLD1_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true), 
+       AArch64ISD::GLD1_UXTW_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false), 
+       AArch64ISD::GLD1_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true), 
+       AArch64ISD::GLD1_SXTW_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false), 
+       AArch64ISD::GLD1_SCALED_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true), 
+       AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false), 
+       AArch64ISD::GLD1_SCALED_MERGE_ZERO}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true), 
+       AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO}, 
+  }; 
+  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); 
+  return AddrModes.find(Key)->second; 
+} 
+ 
+unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { 
+  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false), 
+       AArch64ISD::SST1_PRED}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true), 
+       AArch64ISD::SST1_UXTW_PRED}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false), 
+       AArch64ISD::SST1_PRED}, 
+      {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true), 
+       AArch64ISD::SST1_SXTW_PRED}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false), 
+       AArch64ISD::SST1_SCALED_PRED}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true), 
+       AArch64ISD::SST1_UXTW_SCALED_PRED}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false), 
+       AArch64ISD::SST1_SCALED_PRED}, 
+      {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true), 
+       AArch64ISD::SST1_SXTW_SCALED_PRED}, 
+  }; 
+  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); 
+  return AddrModes.find(Key)->second; 
+} 
+ 
+unsigned getSignExtendedGatherOpcode(unsigned Opcode) { 
+  switch (Opcode) { 
+  default: 
+    llvm_unreachable("unimplemented opcode"); 
+    return Opcode; 
+  case AArch64ISD::GLD1_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_MERGE_ZERO; 
+  case AArch64ISD::GLD1_IMM_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_IMM_MERGE_ZERO; 
+  case AArch64ISD::GLD1_UXTW_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_UXTW_MERGE_ZERO; 
+  case AArch64ISD::GLD1_SXTW_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_SXTW_MERGE_ZERO; 
+  case AArch64ISD::GLD1_SCALED_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_SCALED_MERGE_ZERO; 
+  case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; 
+  case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: 
+    return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; 
+  } 
+} 
+ 
+bool getGatherScatterIndexIsExtended(SDValue Index) { 
+  unsigned Opcode = Index.getOpcode(); 
+  if (Opcode == ISD::SIGN_EXTEND_INREG) 
+    return true; 
+ 
+  if (Opcode == ISD::AND) { 
+    SDValue Splat = Index.getOperand(1); 
+    if (Splat.getOpcode() != ISD::SPLAT_VECTOR) 
+      return false; 
+    ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0)); 
+    if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF) 
+      return false; 
+    return true; 
+  } 
+ 
+  return false; 
+} 
+ 
+// If the base pointer of a masked gather or scatter is null, we 
+// may be able to swap BasePtr & Index and use the vector + register 
+// or vector + immediate addressing mode, e.g. 
+// VECTOR + REGISTER: 
+//    getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices) 
+// -> getelementptr %offset, <vscale x N x T> %indices 
+// VECTOR + IMMEDIATE: 
+//    getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices) 
+// -> getelementptr #x, <vscale x N x T> %indices 
+void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, 
+                                 unsigned &Opcode, bool IsGather, 
+                                 SelectionDAG &DAG) { 
+  if (!isNullConstant(BasePtr)) 
+    return; 
+ 
+  ConstantSDNode *Offset = nullptr; 
+  if (Index.getOpcode() == ISD::ADD) 
+    if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) { 
+      if (isa<ConstantSDNode>(SplatVal)) 
+        Offset = cast<ConstantSDNode>(SplatVal); 
+      else { 
+        BasePtr = SplatVal; 
+        Index = Index->getOperand(0); 
+        return; 
+      } 
+    } 
+ 
+  unsigned NewOp = 
+      IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED; 
+ 
+  if (!Offset) { 
+    std::swap(BasePtr, Index); 
+    Opcode = NewOp; 
+    return; 
+  } 
+ 
+  uint64_t OffsetVal = Offset->getZExtValue(); 
+  unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8; 
+  auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64); 
+ 
+  if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) { 
+    // Index is out of range for the immediate addressing mode 
+    BasePtr = ConstOffset; 
+    Index = Index->getOperand(0); 
+    return; 
+  } 
+ 
+  // Immediate is in range 
+  Opcode = NewOp; 
+  BasePtr = Index->getOperand(0); 
+  Index = ConstOffset; 
+} 
+ 
+SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, 
+                                            SelectionDAG &DAG) const { 
+  SDLoc DL(Op); 
+  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op); 
+  assert(MGT && "Can only custom lower gather load nodes"); 
+ 
+  SDValue Index = MGT->getIndex(); 
+  SDValue Chain = MGT->getChain(); 
+  SDValue PassThru = MGT->getPassThru(); 
+  SDValue Mask = MGT->getMask(); 
+  SDValue BasePtr = MGT->getBasePtr(); 
+  ISD::LoadExtType ExtTy = MGT->getExtensionType(); 
+ 
+  ISD::MemIndexType IndexType = MGT->getIndexType(); 
+  bool IsScaled = 
+      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; 
+  bool IsSigned = 
+      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; 
+  bool IdxNeedsExtend = 
+      getGatherScatterIndexIsExtended(Index) || 
+      Index.getSimpleValueType().getVectorElementType() == MVT::i32; 
+  bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; 
+ 
+  EVT VT = PassThru.getSimpleValueType(); 
+  EVT MemVT = MGT->getMemoryVT(); 
+  SDValue InputVT = DAG.getValueType(MemVT); 
+ 
+  if (VT.getVectorElementType() == MVT::bf16 && 
+      !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) 
+    return SDValue(); 
+ 
+  // Handle FP data by using an integer gather and casting the result. 
+  if (VT.isFloatingPoint()) { 
+    EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); 
+    PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); 
+    InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); 
+  } 
+ 
+  SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other); 
+ 
+  if (getGatherScatterIndexIsExtended(Index)) 
+    Index = Index.getOperand(0); 
+ 
+  unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); 
+  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, 
+                              /*isGather=*/true, DAG); 
+ 
+  if (ResNeedsSignExtend) 
+    Opcode = getSignExtendedGatherOpcode(Opcode); 
+ 
+  SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru}; 
+  SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops); 
+ 
+  if (VT.isFloatingPoint()) { 
+    SDValue Cast = getSVESafeBitCast(VT, Gather, DAG); 
+    return DAG.getMergeValues({Cast, Gather}, DL); 
+  } 
+ 
+  return Gather; 
+} 
+ 
+SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, 
+                                             SelectionDAG &DAG) const { 
+  SDLoc DL(Op); 
+  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op); 
+  assert(MSC && "Can only custom lower scatter store nodes"); 
+ 
+  SDValue Index = MSC->getIndex(); 
+  SDValue Chain = MSC->getChain(); 
+  SDValue StoreVal = MSC->getValue(); 
+  SDValue Mask = MSC->getMask(); 
+  SDValue BasePtr = MSC->getBasePtr(); 
+ 
+  ISD::MemIndexType IndexType = MSC->getIndexType(); 
+  bool IsScaled = 
+      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; 
+  bool IsSigned = 
+      IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; 
+  bool NeedsExtend = 
+      getGatherScatterIndexIsExtended(Index) || 
+      Index.getSimpleValueType().getVectorElementType() == MVT::i32; 
+ 
+  EVT VT = StoreVal.getSimpleValueType(); 
+  SDVTList VTs = DAG.getVTList(MVT::Other); 
+  EVT MemVT = MSC->getMemoryVT(); 
+  SDValue InputVT = DAG.getValueType(MemVT); 
+ 
+  if (VT.getVectorElementType() == MVT::bf16 && 
+      !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) 
+    return SDValue(); 
+ 
+  // Handle FP data by casting the data so an integer scatter can be used. 
+  if (VT.isFloatingPoint()) { 
+    EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); 
+    StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); 
+    InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); 
+  } 
+ 
+  if (getGatherScatterIndexIsExtended(Index)) 
+    Index = Index.getOperand(0); 
+ 
+  unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend); 
+  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, 
+                              /*isGather=*/false, DAG); 
+ 
+  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; 
+  return DAG.getNode(Opcode, DL, VTs, Ops); 
+} 
+ 
 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
 static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
                                         EVT VT, EVT MemVT,
@@ -4151,9 +4151,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
     // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
     // the custom lowering, as there are no un-paired non-temporal stores and
     // legalization will break up 256 bit inputs.
-    ElementCount EC = MemVT.getVectorElementCount();
+    ElementCount EC = MemVT.getVectorElementCount(); 
     if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
-        EC.isKnownEven() &&
+        EC.isKnownEven() && 
         ((MemVT.getScalarSizeInBits() == 8u ||
           MemVT.getScalarSizeInBits() == 16u ||
           MemVT.getScalarSizeInBits() == 32u ||
@@ -4162,11 +4162,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
           DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
                       MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
                       StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
-      SDValue Hi =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
-                      MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
-                      StoreNode->getValue(),
-                      DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
+      SDValue Hi = 
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, 
+                      MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), 
+                      StoreNode->getValue(), 
+                      DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); 
       SDValue Result = DAG.getMemIntrinsicNode(
           AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
           {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
@@ -4191,25 +4191,25 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
   return SDValue();
 }
 
-// Generate SUBS and CSEL for integer abs.
-SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
-  MVT VT = Op.getSimpleValueType();
-
-  if (VT.isVector())
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
-
-  SDLoc DL(Op);
-  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
-                            Op.getOperand(0));
-  // Generate SUBS & CSEL.
-  SDValue Cmp =
-      DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
-                  Op.getOperand(0), DAG.getConstant(0, DL, VT));
-  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
-                     DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
-                     Cmp.getValue(1));
-}
-
+// Generate SUBS and CSEL for integer abs. 
+SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { 
+  MVT VT = Op.getSimpleValueType(); 
+ 
+  if (VT.isVector()) 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); 
+ 
+  SDLoc DL(Op); 
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 
+                            Op.getOperand(0)); 
+  // Generate SUBS & CSEL. 
+  SDValue Cmp = 
+      DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), 
+                  Op.getOperand(0), DAG.getConstant(0, DL, VT)); 
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, 
+                     DAG.getConstant(AArch64CC::PL, DL, MVT::i32), 
+                     Cmp.getValue(1)); 
+} 
+ 
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -4262,35 +4262,35 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::UMULO:
     return LowerXALUO(Op, DAG);
   case ISD::FADD:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); 
   case ISD::FSUB:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED); 
   case ISD::FMUL:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED); 
   case ISD::FMA:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
   case ISD::FDIV:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
-  case ISD::FNEG:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
-  case ISD::FCEIL:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
-  case ISD::FFLOOR:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
-  case ISD::FNEARBYINT:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
-  case ISD::FRINT:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
-  case ISD::FROUND:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
-  case ISD::FROUNDEVEN:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
-  case ISD::FTRUNC:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
-  case ISD::FSQRT:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
-  case ISD::FABS:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); 
+  case ISD::FNEG: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); 
+  case ISD::FCEIL: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU); 
+  case ISD::FFLOOR: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU); 
+  case ISD::FNEARBYINT: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU); 
+  case ISD::FRINT: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU); 
+  case ISD::FROUND: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU); 
+  case ISD::FROUNDEVEN: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU); 
+  case ISD::FTRUNC: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); 
+  case ISD::FSQRT: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU); 
+  case ISD::FABS: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU); 
   case ISD::FP_ROUND:
   case ISD::STRICT_FP_ROUND:
     return LowerFP_ROUND(Op, DAG);
@@ -4304,8 +4304,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerRETURNADDR(Op, DAG);
   case ISD::ADDROFRETURNADDR:
     return LowerADDROFRETURNADDR(Op, DAG);
-  case ISD::CONCAT_VECTORS:
-    return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::CONCAT_VECTORS: 
+    return LowerCONCAT_VECTORS(Op, DAG); 
   case ISD::INSERT_VECTOR_ELT:
     return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
@@ -4322,19 +4322,19 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerINSERT_SUBVECTOR(Op, DAG);
   case ISD::SDIV:
   case ISD::UDIV:
-    return LowerDIV(Op, DAG);
+    return LowerDIV(Op, DAG); 
   case ISD::SMIN:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
-                               /*OverrideNEON=*/true);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, 
+                               /*OverrideNEON=*/true); 
   case ISD::UMIN:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
-                               /*OverrideNEON=*/true);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, 
+                               /*OverrideNEON=*/true); 
   case ISD::SMAX:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
-                               /*OverrideNEON=*/true);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, 
+                               /*OverrideNEON=*/true); 
   case ISD::UMAX:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
-                               /*OverrideNEON=*/true);
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, 
+                               /*OverrideNEON=*/true); 
   case ISD::SRA:
   case ISD::SRL:
   case ISD::SHL:
@@ -4374,21 +4374,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::STORE:
     return LowerSTORE(Op, DAG);
-  case ISD::MGATHER:
-    return LowerMGATHER(Op, DAG);
-  case ISD::MSCATTER:
-    return LowerMSCATTER(Op, DAG);
-  case ISD::VECREDUCE_SEQ_FADD:
-    return LowerVECREDUCE_SEQ_FADD(Op, DAG);
+  case ISD::MGATHER: 
+    return LowerMGATHER(Op, DAG); 
+  case ISD::MSCATTER: 
+    return LowerMSCATTER(Op, DAG); 
+  case ISD::VECREDUCE_SEQ_FADD: 
+    return LowerVECREDUCE_SEQ_FADD(Op, DAG); 
   case ISD::VECREDUCE_ADD:
-  case ISD::VECREDUCE_AND:
-  case ISD::VECREDUCE_OR:
-  case ISD::VECREDUCE_XOR:
+  case ISD::VECREDUCE_AND: 
+  case ISD::VECREDUCE_OR: 
+  case ISD::VECREDUCE_XOR: 
   case ISD::VECREDUCE_SMAX:
   case ISD::VECREDUCE_SMIN:
   case ISD::VECREDUCE_UMAX:
   case ISD::VECREDUCE_UMIN:
-  case ISD::VECREDUCE_FADD:
+  case ISD::VECREDUCE_FADD: 
   case ISD::VECREDUCE_FMAX:
   case ISD::VECREDUCE_FMIN:
     return LowerVECREDUCE(Op, DAG);
@@ -4400,21 +4400,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VSCALE:
     return LowerVSCALE(Op, DAG);
-  case ISD::ANY_EXTEND:
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-    return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
-  case ISD::SIGN_EXTEND_INREG: {
-    // Only custom lower when ExtraVT has a legal byte based element type.
-    EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    EVT ExtraEltVT = ExtraVT.getVectorElementType();
-    if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
-        (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
-      return SDValue();
-
-    return LowerToPredicatedOp(Op, DAG,
-                               AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
-  }
+  case ISD::ANY_EXTEND: 
+  case ISD::SIGN_EXTEND: 
+  case ISD::ZERO_EXTEND: 
+    return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); 
+  case ISD::SIGN_EXTEND_INREG: { 
+    // Only custom lower when ExtraVT has a legal byte based element type. 
+    EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 
+    EVT ExtraEltVT = ExtraVT.getVectorElementType(); 
+    if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) && 
+        (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64)) 
+      return SDValue(); 
+ 
+    return LowerToPredicatedOp(Op, DAG, 
+                               AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU); 
+  } 
   case ISD::TRUNCATE:
     return LowerTRUNCATE(Op, DAG);
   case ISD::LOAD:
@@ -4422,49 +4422,49 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
       return LowerFixedLengthVectorLoadToSVE(Op, DAG);
     llvm_unreachable("Unexpected request to lower ISD::LOAD");
   case ISD::ADD:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
-  case ISD::AND:
-    return LowerToScalableOp(Op, DAG);
-  case ISD::SUB:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
-  case ISD::FMAXNUM:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
-  case ISD::FMINNUM:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
-  case ISD::VSELECT:
-    return LowerFixedLengthVectorSelectToSVE(Op, DAG);
-  case ISD::ABS:
-    return LowerABS(Op, DAG);
-  case ISD::BITREVERSE:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
-                               /*OverrideNEON=*/true);
-  case ISD::BSWAP:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
-  case ISD::CTLZ:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
-                               /*OverrideNEON=*/true);
-  case ISD::CTTZ:
-    return LowerCTTZ(Op, DAG);
-  }
-}
-
-bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
-  return !Subtarget->useSVEForFixedLengthVectors();
-}
-
-bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
-    EVT VT, bool OverrideNEON) const {
-  if (!Subtarget->useSVEForFixedLengthVectors())
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); 
+  case ISD::AND: 
+    return LowerToScalableOp(Op, DAG); 
+  case ISD::SUB: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED); 
+  case ISD::FMAXNUM: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED); 
+  case ISD::FMINNUM: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED); 
+  case ISD::VSELECT: 
+    return LowerFixedLengthVectorSelectToSVE(Op, DAG); 
+  case ISD::ABS: 
+    return LowerABS(Op, DAG); 
+  case ISD::BITREVERSE: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU, 
+                               /*OverrideNEON=*/true); 
+  case ISD::BSWAP: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU); 
+  case ISD::CTLZ: 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU, 
+                               /*OverrideNEON=*/true); 
+  case ISD::CTTZ: 
+    return LowerCTTZ(Op, DAG); 
+  }
+}
+
+bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { 
+  return !Subtarget->useSVEForFixedLengthVectors(); 
+}
+
+bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( 
+    EVT VT, bool OverrideNEON) const { 
+  if (!Subtarget->useSVEForFixedLengthVectors()) 
     return false;
 
   if (!VT.isFixedLengthVector())
     return false;
 
-  // Don't use SVE for vectors we cannot scalarize if required.
-  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+  // Don't use SVE for vectors we cannot scalarize if required. 
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 
   // Fixed length predicates should be promoted to i8.
   // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
-  case MVT::i1:
+  case MVT::i1: 
   default:
     return false;
   case MVT::i8:
@@ -4477,16 +4477,16 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
     break;
   }
 
-  // All SVE implementations support NEON sized vectors.
-  if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
-    return true;
-
+  // All SVE implementations support NEON sized vectors. 
+  if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector())) 
+    return true; 
+ 
   // Ensure NEON MVTs only belong to a single register class.
-  if (VT.getFixedSizeInBits() <= 128)
+  if (VT.getFixedSizeInBits() <= 128) 
     return false;
 
   // Don't use SVE for types that don't fit.
-  if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
+  if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits()) 
     return false;
 
   // TODO: Perhaps an artificial restriction, but worth having whilst getting
@@ -4586,9 +4586,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     (void)Res;
   }
   SmallVector<SDValue, 16> ArgValues;
-  unsigned ExtraArgLocs = 0;
-  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
+  unsigned ExtraArgLocs = 0; 
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 
+    CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; 
 
     if (Ins[i].Flags.isByVal()) {
       // Byval is used for HFAs in the PCS, but the system should work in a
@@ -4716,44 +4716,44 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     if (VA.getLocInfo() == CCValAssign::Indirect) {
       assert(VA.getValVT().isScalableVector() &&
            "Only scalable vectors can be passed indirectly");
-
-      uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
-      unsigned NumParts = 1;
-      if (Ins[i].Flags.isInConsecutiveRegs()) {
-        assert(!Ins[i].Flags.isInConsecutiveRegsLast());
-        while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
-          ++NumParts;
-      }
-
-      MVT PartLoad = VA.getValVT();
-      SDValue Ptr = ArgValue;
-
-      // Ensure we generate all loads for each tuple part, whilst updating the
-      // pointer after each load correctly using vscale.
-      while (NumParts > 0) {
-        ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
-        InVals.push_back(ArgValue);
-        NumParts--;
-        if (NumParts > 0) {
-          SDValue BytesIncrement = DAG.getVScale(
-              DL, Ptr.getValueType(),
-              APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
-          SDNodeFlags Flags;
-          Flags.setNoUnsignedWrap(true);
-          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                            BytesIncrement, Flags);
-          ExtraArgLocs++;
-          i++;
-        }
-      }
-    } else {
-      if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
-        ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
-                               ArgValue, DAG.getValueType(MVT::i32));
-      InVals.push_back(ArgValue);
+ 
+      uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize(); 
+      unsigned NumParts = 1; 
+      if (Ins[i].Flags.isInConsecutiveRegs()) { 
+        assert(!Ins[i].Flags.isInConsecutiveRegsLast()); 
+        while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) 
+          ++NumParts; 
+      } 
+ 
+      MVT PartLoad = VA.getValVT(); 
+      SDValue Ptr = ArgValue; 
+ 
+      // Ensure we generate all loads for each tuple part, whilst updating the 
+      // pointer after each load correctly using vscale. 
+      while (NumParts > 0) { 
+        ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo()); 
+        InVals.push_back(ArgValue); 
+        NumParts--; 
+        if (NumParts > 0) { 
+          SDValue BytesIncrement = DAG.getVScale( 
+              DL, Ptr.getValueType(), 
+              APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); 
+          SDNodeFlags Flags; 
+          Flags.setNoUnsignedWrap(true); 
+          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 
+                            BytesIncrement, Flags); 
+          ExtraArgLocs++; 
+          i++; 
+        } 
+      } 
+    } else { 
+      if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) 
+        ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), 
+                               ArgValue, DAG.getValueType(MVT::i32)); 
+      InVals.push_back(ArgValue); 
     }
   }
-  assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
+  assert((ArgLocs.size() + ExtraArgLocs) == Ins.size()); 
 
   // varargs
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -4928,7 +4928,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
     SDValue ThisVal) const {
-  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
   DenseMap<unsigned, SDValue> CopiedRegs;
@@ -5351,9 +5351,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   }
 
   // Walk the register/memloc assignments, inserting copies/loads.
-  unsigned ExtraArgLocs = 0;
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
+  unsigned ExtraArgLocs = 0; 
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 
+    CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; 
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
 
@@ -5395,49 +5395,49 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     case CCValAssign::Indirect:
       assert(VA.getValVT().isScalableVector() &&
              "Only scalable vectors can be passed indirectly");
-
-      uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
-      uint64_t PartSize = StoreSize;
-      unsigned NumParts = 1;
-      if (Outs[i].Flags.isInConsecutiveRegs()) {
-        assert(!Outs[i].Flags.isInConsecutiveRegsLast());
-        while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
-          ++NumParts;
-        StoreSize *= NumParts;
-      }
-
+ 
+      uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize(); 
+      uint64_t PartSize = StoreSize; 
+      unsigned NumParts = 1; 
+      if (Outs[i].Flags.isInConsecutiveRegs()) { 
+        assert(!Outs[i].Flags.isInConsecutiveRegsLast()); 
+        while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) 
+          ++NumParts; 
+        StoreSize *= NumParts; 
+      } 
+ 
       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
       Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
       Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
-      int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
-      MFI.setStackID(FI, TargetStackID::ScalableVector);
+      int FI = MFI.CreateStackObject(StoreSize, Alignment, false); 
+      MFI.setStackID(FI, TargetStackID::ScalableVector); 
 
-      MachinePointerInfo MPI =
-          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
-      SDValue Ptr = DAG.getFrameIndex(
+      MachinePointerInfo MPI = 
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); 
+      SDValue Ptr = DAG.getFrameIndex( 
           FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
-      SDValue SpillSlot = Ptr;
-
-      // Ensure we generate all stores for each tuple part, whilst updating the
-      // pointer after each store correctly using vscale.
-      while (NumParts) {
-        Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
-        NumParts--;
-        if (NumParts > 0) {
-          SDValue BytesIncrement = DAG.getVScale(
-              DL, Ptr.getValueType(),
-              APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
-          SDNodeFlags Flags;
-          Flags.setNoUnsignedWrap(true);
-
-          MPI = MachinePointerInfo(MPI.getAddrSpace());
-          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                            BytesIncrement, Flags);
-          ExtraArgLocs++;
-          i++;
-        }
-      }
-
+      SDValue SpillSlot = Ptr; 
+ 
+      // Ensure we generate all stores for each tuple part, whilst updating the 
+      // pointer after each store correctly using vscale. 
+      while (NumParts) { 
+        Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI); 
+        NumParts--; 
+        if (NumParts > 0) { 
+          SDValue BytesIncrement = DAG.getVScale( 
+              DL, Ptr.getValueType(), 
+              APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); 
+          SDNodeFlags Flags; 
+          Flags.setNoUnsignedWrap(true); 
+ 
+          MPI = MachinePointerInfo(MPI.getAddrSpace()); 
+          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 
+                            BytesIncrement, Flags); 
+          ExtraArgLocs++; 
+          i++; 
+        } 
+      } 
+ 
       Arg = SpillSlot;
       break;
     }
@@ -5457,18 +5457,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         // take care of putting the two halves in the right place but we have to
         // combine them.
         SDValue &Bits =
-            llvm::find_if(RegsToPass,
-                          [=](const std::pair<unsigned, SDValue> &Elt) {
-                            return Elt.first == VA.getLocReg();
-                          })
+            llvm::find_if(RegsToPass, 
+                          [=](const std::pair<unsigned, SDValue> &Elt) { 
+                            return Elt.first == VA.getLocReg(); 
+                          }) 
                 ->second;
         Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
         // Call site info is used for function's parameter entry value
         // tracking. For now we track only simple cases when parameter
         // is transferred through whole register.
-        llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
-          return ArgReg.Reg == VA.getLocReg();
-        });
+        llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) { 
+          return ArgReg.Reg == VA.getLocReg(); 
+        }); 
       } else {
         RegsToPass.emplace_back(VA.getLocReg(), Arg);
         RegsUsed.insert(VA.getLocReg());
@@ -5487,7 +5487,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       uint32_t BEAlign = 0;
       unsigned OpSize;
       if (VA.getLocInfo() == CCValAssign::Indirect)
-        OpSize = VA.getLocVT().getFixedSizeInBits();
+        OpSize = VA.getLocVT().getFixedSizeInBits(); 
       else
         OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
                                  : VA.getValVT().getSizeInBits();
@@ -5647,17 +5647,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     return Ret;
   }
 
-  unsigned CallOpc = AArch64ISD::CALL;
-  // Calls marked with "rv_marker" are special. They should be expanded to the
-  // call, directly followed by a special marker sequence. Use the CALL_RVMARKER
-  // to do that.
-  if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
-    assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
-    CallOpc = AArch64ISD::CALL_RVMARKER;
-  }
-
+  unsigned CallOpc = AArch64ISD::CALL; 
+  // Calls marked with "rv_marker" are special. They should be expanded to the 
+  // call, directly followed by a special marker sequence. Use the CALL_RVMARKER 
+  // to do that. 
+  if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) { 
+    assert(!IsTailCall && "tail calls cannot be marked with rv_marker"); 
+    CallOpc = AArch64ISD::CALL_RVMARKER; 
+  } 
+ 
   // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
+  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); 
   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
   InFlag = Chain.getValue(1);
   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -5681,7 +5681,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 bool AArch64TargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
-  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC);
@@ -5696,7 +5696,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   auto &MF = DAG.getMachineFunction();
   auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
 
-  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); 
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                  *DAG.getContext());
@@ -5741,9 +5741,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 
     if (RegsUsed.count(VA.getLocReg())) {
       SDValue &Bits =
-          llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
-            return Elt.first == VA.getLocReg();
-          })->second;
+          llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) { 
+            return Elt.first == VA.getLocReg(); 
+          })->second; 
       Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
     } else {
       RetVals.emplace_back(VA.getLocReg(), Arg);
@@ -5963,7 +5963,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   SDValue FuncTLVGet = DAG.getLoad(
       PtrMemVT, DL, Chain, DescAddr,
       MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-      Align(PtrMemVT.getSizeInBits() / 8),
+      Align(PtrMemVT.getSizeInBits() / 8), 
       MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   Chain = FuncTLVGet.getValue(1);
 
@@ -6278,22 +6278,22 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
   llvm_unreachable("Unexpected platform trying to use TLS");
 }
 
-// Looks through \param Val to determine the bit that can be used to
-// check the sign of the value. It returns the unextended value and
-// the sign bit position.
-std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
-  if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
-    return {Val.getOperand(0),
-            cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
-                1};
-
-  if (Val.getOpcode() == ISD::SIGN_EXTEND)
-    return {Val.getOperand(0),
-            Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
-
-  return {Val, Val.getValueSizeInBits() - 1};
-}
-
+// Looks through \param Val to determine the bit that can be used to 
+// check the sign of the value. It returns the unextended value and 
+// the sign bit position. 
+std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) { 
+  if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG) 
+    return {Val.getOperand(0), 
+            cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() - 
+                1}; 
+ 
+  if (Val.getOpcode() == ISD::SIGN_EXTEND) 
+    return {Val.getOperand(0), 
+            Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1}; 
+ 
+  return {Val, Val.getValueSizeInBits() - 1}; 
+} 
+ 
 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -6388,10 +6388,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
         // Don't combine AND since emitComparison converts the AND to an ANDS
         // (a.k.a. TST) and the test in the test bit and branch instruction
         // becomes redundant.  This would also increase register pressure.
-        uint64_t SignBitPos;
-        std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
+        uint64_t SignBitPos; 
+        std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); 
         return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
-                           DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+                           DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); 
       }
     }
     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -6399,10 +6399,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       // Don't combine AND since emitComparison converts the AND to an ANDS
       // (a.k.a. TST) and the test in the test bit and branch instruction
       // becomes redundant.  This would also increase register pressure.
-      uint64_t SignBitPos;
-      std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
+      uint64_t SignBitPos; 
+      std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); 
       return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
-                         DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+                         DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); 
     }
 
     SDValue CCVal;
@@ -6549,9 +6549,9 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
   }
 
-  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
-
+  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) 
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU); 
+ 
   assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
           VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
          "Unexpected type for custom ctpop lowering");
@@ -6575,16 +6575,16 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
   return Val;
 }
 
-SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  assert(VT.isScalableVector() ||
-         useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
-
-  SDLoc DL(Op);
-  SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
-  return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
-}
-
+SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+  assert(VT.isScalableVector() || 
+         useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)); 
+ 
+  SDLoc DL(Op); 
+  SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0)); 
+  return DAG.getNode(ISD::CTLZ, DL, VT, RBIT); 
+} 
+ 
 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   if (Op.getValueType().isVector())
@@ -6742,8 +6742,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
       // instead of a CSEL in that case.
       if (TrueVal == ~FalseVal) {
         Opcode = AArch64ISD::CSINV;
-      } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
-                 TrueVal == -FalseVal) {
+      } else if (FalseVal > std::numeric_limits<int64_t>::min() && 
+                 TrueVal == -FalseVal) { 
         Opcode = AArch64ISD::CSNEG;
       } else if (TVal.getValueType() == MVT::i32) {
         // If our operands are only 32-bit wide, make sure we use 32-bit
@@ -6943,9 +6943,9 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
   SDValue Entry = Op.getOperand(2);
   int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
 
-  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
-  AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
-
+  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); 
+  AFI->setJumpTableEntryInfo(JTI, 4, nullptr); 
+ 
   SDNode *Dest =
       DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
                          Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
@@ -7012,13 +7012,13 @@ SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
 }
 
 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
-                                                  SelectionDAG &DAG) const {
+                                                  SelectionDAG &DAG) const { 
   // The layout of the va_list struct is specified in the AArch64 Procedure Call
   // Standard, section B.3.
   MachineFunction &MF = DAG.getMachineFunction();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
-  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
-  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
+  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; 
+  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
 
@@ -7028,64 +7028,64 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   SmallVector<SDValue, 4> MemOps;
 
   // void *__stack at offset 0
-  unsigned Offset = 0;
+  unsigned Offset = 0; 
   SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
-  Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
+  Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT); 
   MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
-                                MachinePointerInfo(SV), Align(PtrSize)));
+                                MachinePointerInfo(SV), Align(PtrSize))); 
 
-  // void *__gr_top at offset 8 (4 on ILP32)
-  Offset += PtrSize;
+  // void *__gr_top at offset 8 (4 on ILP32) 
+  Offset += PtrSize; 
   int GPRSize = FuncInfo->getVarArgsGPRSize();
   if (GPRSize > 0) {
     SDValue GRTop, GRTopAddr;
 
-    GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
-                            DAG.getConstant(Offset, DL, PtrVT));
+    GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, 
+                            DAG.getConstant(Offset, DL, PtrVT)); 
 
     GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
     GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
                         DAG.getConstant(GPRSize, DL, PtrVT));
-    GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
+    GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT); 
 
     MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
-                                  MachinePointerInfo(SV, Offset),
-                                  Align(PtrSize)));
+                                  MachinePointerInfo(SV, Offset), 
+                                  Align(PtrSize))); 
   }
 
-  // void *__vr_top at offset 16 (8 on ILP32)
-  Offset += PtrSize;
+  // void *__vr_top at offset 16 (8 on ILP32) 
+  Offset += PtrSize; 
   int FPRSize = FuncInfo->getVarArgsFPRSize();
   if (FPRSize > 0) {
     SDValue VRTop, VRTopAddr;
     VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
-                            DAG.getConstant(Offset, DL, PtrVT));
+                            DAG.getConstant(Offset, DL, PtrVT)); 
 
     VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
     VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
                         DAG.getConstant(FPRSize, DL, PtrVT));
-    VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
+    VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT); 
 
     MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
-                                  MachinePointerInfo(SV, Offset),
-                                  Align(PtrSize)));
-  }
-
-  // int __gr_offs at offset 24 (12 on ILP32)
-  Offset += PtrSize;
-  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
-                                   DAG.getConstant(Offset, DL, PtrVT));
-  MemOps.push_back(
-      DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
-                   GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
-
-  // int __vr_offs at offset 28 (16 on ILP32)
-  Offset += 4;
-  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
-                                   DAG.getConstant(Offset, DL, PtrVT));
-  MemOps.push_back(
-      DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
-                   VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
+                                  MachinePointerInfo(SV, Offset), 
+                                  Align(PtrSize))); 
+  }
+
+  // int __gr_offs at offset 24 (12 on ILP32) 
+  Offset += PtrSize; 
+  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, 
+                                   DAG.getConstant(Offset, DL, PtrVT)); 
+  MemOps.push_back( 
+      DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), 
+                   GROffsAddr, MachinePointerInfo(SV, Offset), Align(4))); 
+
+  // int __vr_offs at offset 28 (16 on ILP32) 
+  Offset += 4; 
+  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, 
+                                   DAG.getConstant(Offset, DL, PtrVT)); 
+  MemOps.push_back( 
+      DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), 
+                   VROffsAddr, MachinePointerInfo(SV, Offset), Align(4))); 
 
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
 }
@@ -7108,10 +7108,10 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
   // pointer.
   SDLoc DL(Op);
   unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
-  unsigned VaListSize =
-      (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
-          ? PtrSize
-          : Subtarget->isTargetILP32() ? 20 : 32;
+  unsigned VaListSize = 
+      (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) 
+          ? PtrSize 
+          : Subtarget->isTargetILP32() ? 20 : 32; 
   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
@@ -7264,34 +7264,34 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  SDValue ReturnAddress;
+  SDValue ReturnAddress; 
   if (Depth) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
-    ReturnAddress = DAG.getLoad(
-        VT, DL, DAG.getEntryNode(),
-        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
-  } else {
-    // Return LR, which contains the return address. Mark it an implicit
-    // live-in.
-    unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
-    ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
-  }
-
-  // The XPACLRI instruction assembles to a hint-space instruction before
-  // Armv8.3-A therefore this instruction can be safely used for any pre
-  // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
-  // that instead.
-  SDNode *St;
-  if (Subtarget->hasPAuth()) {
-    St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
-  } else {
-    // XPACLRI operates on LR therefore we must move the operand accordingly.
-    SDValue Chain =
-        DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
-    St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
-  }
-  return SDValue(St, 0);
+    ReturnAddress = DAG.getLoad( 
+        VT, DL, DAG.getEntryNode(), 
+        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo()); 
+  } else { 
+    // Return LR, which contains the return address. Mark it an implicit 
+    // live-in. 
+    unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass); 
+    ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); 
+  }
+
+  // The XPACLRI instruction assembles to a hint-space instruction before 
+  // Armv8.3-A therefore this instruction can be safely used for any pre 
+  // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use 
+  // that instead. 
+  SDNode *St; 
+  if (Subtarget->hasPAuth()) { 
+    St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress); 
+  } else { 
+    // XPACLRI operates on LR therefore we must move the operand accordingly. 
+    SDValue Chain = 
+        DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress); 
+    St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain); 
+  } 
+  return SDValue(St, 0); 
 }
 
 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -7472,22 +7472,22 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
   return SDValue();
 }
 
-SDValue
-AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
-                                        const DenormalMode &Mode) const {
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
-  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
-  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
-  return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
-}
-
-SDValue
-AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  return Op;
-}
-
+SDValue 
+AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, 
+                                        const DenormalMode &Mode) const { 
+  SDLoc DL(Op); 
+  EVT VT = Op.getValueType(); 
+  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 
+  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 
+  return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); 
+} 
+ 
+SDValue 
+AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op, 
+                                                   SelectionDAG &DAG) const { 
+  return Op; 
+} 
+ 
 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
                                                SelectionDAG &DAG, int Enabled,
                                                int &ExtraSteps,
@@ -7511,7 +7511,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
         Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
       }
-      if (!Reciprocal)
+      if (!Reciprocal) 
         Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
 
       ExtraSteps = 0;
@@ -7688,30 +7688,30 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
-      if (VT.isScalableVector())
-        return std::make_pair(0U, nullptr);
-      if (VT.getFixedSizeInBits() == 64)
+      if (VT.isScalableVector()) 
+        return std::make_pair(0U, nullptr); 
+      if (VT.getFixedSizeInBits() == 64) 
         return std::make_pair(0U, &AArch64::GPR64commonRegClass);
       return std::make_pair(0U, &AArch64::GPR32commonRegClass);
-    case 'w': {
+    case 'w': { 
       if (!Subtarget->hasFPARMv8())
         break;
-      if (VT.isScalableVector()) {
-        if (VT.getVectorElementType() != MVT::i1)
-          return std::make_pair(0U, &AArch64::ZPRRegClass);
-        return std::make_pair(0U, nullptr);
-      }
-      uint64_t VTSize = VT.getFixedSizeInBits();
-      if (VTSize == 16)
+      if (VT.isScalableVector()) { 
+        if (VT.getVectorElementType() != MVT::i1) 
+          return std::make_pair(0U, &AArch64::ZPRRegClass); 
+        return std::make_pair(0U, nullptr); 
+      } 
+      uint64_t VTSize = VT.getFixedSizeInBits(); 
+      if (VTSize == 16) 
         return std::make_pair(0U, &AArch64::FPR16RegClass);
-      if (VTSize == 32)
+      if (VTSize == 32) 
         return std::make_pair(0U, &AArch64::FPR32RegClass);
-      if (VTSize == 64)
+      if (VTSize == 64) 
         return std::make_pair(0U, &AArch64::FPR64RegClass);
-      if (VTSize == 128)
+      if (VTSize == 128) 
         return std::make_pair(0U, &AArch64::FPR128RegClass);
       break;
-    }
+    } 
     // The instructions that this constraint is designed for can
     // only take 128-bit registers so just use that regclass.
     case 'x':
@@ -7732,11 +7732,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
   } else {
     PredicateConstraint PC = parsePredicateConstraint(Constraint);
     if (PC != PredicateConstraint::Invalid) {
-      if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
-        return std::make_pair(0U, nullptr);
+      if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) 
+        return std::make_pair(0U, nullptr); 
       bool restricted = (PC == PredicateConstraint::Upl);
       return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
-                        : std::make_pair(0U, &AArch64::PPRRegClass);
+                        : std::make_pair(0U, &AArch64::PPRRegClass); 
     }
   }
   if (StringRef("{cc}").equals_lower(Constraint))
@@ -7975,8 +7975,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
   LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
-  assert(!VT.isScalableVector() &&
-         "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
+  assert(!VT.isScalableVector() && 
+         "Scalable vectors cannot be used with ISD::BUILD_VECTOR"); 
   unsigned NumElts = VT.getVectorNumElements();
 
   struct ShuffleSourceInfo {
@@ -8047,9 +8047,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     }
   }
   unsigned ResMultiplier =
-      VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
-  uint64_t VTSize = VT.getFixedSizeInBits();
-  NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
+      VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits(); 
+  uint64_t VTSize = VT.getFixedSizeInBits(); 
+  NumElts = VTSize / SmallestEltTy.getFixedSizeInBits(); 
   EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
 
   // If the source vector is too wide or too narrow, we may nevertheless be able
@@ -8058,18 +8058,18 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
   for (auto &Src : Sources) {
     EVT SrcVT = Src.ShuffleVec.getValueType();
 
-    uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
-    if (SrcVTSize == VTSize)
+    uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); 
+    if (SrcVTSize == VTSize) 
       continue;
 
     // This stage of the search produces a source with the same element type as
     // the original, but with a total width matching the BUILD_VECTOR output.
     EVT EltVT = SrcVT.getVectorElementType();
-    unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
+    unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); 
     EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
 
-    if (SrcVTSize < VTSize) {
-      assert(2 * SrcVTSize == VTSize);
+    if (SrcVTSize < VTSize) { 
+      assert(2 * SrcVTSize == VTSize); 
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
       Src.ShuffleVec =
@@ -8078,11 +8078,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       continue;
     }
 
-    if (SrcVTSize != 2 * VTSize) {
-      LLVM_DEBUG(
-          dbgs() << "Reshuffle failed: result vector too small to extract\n");
-      return SDValue();
-    }
+    if (SrcVTSize != 2 * VTSize) { 
+      LLVM_DEBUG( 
+          dbgs() << "Reshuffle failed: result vector too small to extract\n"); 
+      return SDValue(); 
+    } 
 
     if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
       LLVM_DEBUG(
@@ -8111,13 +8111,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                       DAG.getConstant(NumSrcElts, dl, MVT::i64));
       unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
 
-      if (!SrcVT.is64BitVector()) {
-        LLVM_DEBUG(
-          dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
-                    "for SVE vectors.");
-        return SDValue();
-      }
-
+      if (!SrcVT.is64BitVector()) { 
+        LLVM_DEBUG( 
+          dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT " 
+                    "for SVE vectors."); 
+        return SDValue(); 
+      } 
+ 
       Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
                                    VEXTSrc2,
                                    DAG.getConstant(Imm, dl, MVT::i32));
@@ -8134,8 +8134,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       continue;
     assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
     Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
-    Src.WindowScale =
-        SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
+    Src.WindowScale = 
+        SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits(); 
     Src.WindowBase *= Src.WindowScale;
   }
 
@@ -8159,8 +8159,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
     // segment.
     EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
-    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
-                               VT.getScalarSizeInBits());
+    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(), 
+                               VT.getScalarSizeInBits()); 
     int LanesDefined = BitsDefined / BitsPerShuffleLane;
 
     // This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -8224,81 +8224,81 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
   return true;
 }
 
-/// Check if a vector shuffle corresponds to a DUP instructions with a larger
-/// element width than the vector lane type. If that is the case the function
-/// returns true and writes the value of the DUP instruction lane operand into
-/// DupLaneOp
-static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
-                          unsigned &DupLaneOp) {
-  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
-         "Only possible block sizes for wide DUP are: 16, 32, 64");
-
-  if (BlockSize <= VT.getScalarSizeInBits())
-    return false;
-  if (BlockSize % VT.getScalarSizeInBits() != 0)
-    return false;
-  if (VT.getSizeInBits() % BlockSize != 0)
-    return false;
-
-  size_t SingleVecNumElements = VT.getVectorNumElements();
-  size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
-  size_t NumBlocks = VT.getSizeInBits() / BlockSize;
-
-  // We are looking for masks like
-  // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
-  // might be replaced by 'undefined'. BlockIndices will eventually contain
-  // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
-  // for the above examples)
-  SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
-  for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
-    for (size_t I = 0; I < NumEltsPerBlock; I++) {
-      int Elt = M[BlockIndex * NumEltsPerBlock + I];
-      if (Elt < 0)
-        continue;
-      // For now we don't support shuffles that use the second operand
-      if ((unsigned)Elt >= SingleVecNumElements)
-        return false;
-      if (BlockElts[I] < 0)
-        BlockElts[I] = Elt;
-      else if (BlockElts[I] != Elt)
-        return false;
-    }
-
-  // We found a candidate block (possibly with some undefs). It must be a
-  // sequence of consecutive integers starting with a value divisible by
-  // NumEltsPerBlock with some values possibly replaced by undef-s.
-
-  // Find first non-undef element
-  auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
-  assert(FirstRealEltIter != BlockElts.end() &&
-         "Shuffle with all-undefs must have been caught by previous cases, "
-         "e.g. isSplat()");
-  if (FirstRealEltIter == BlockElts.end()) {
-    DupLaneOp = 0;
-    return true;
-  }
-
-  // Index of FirstRealElt in BlockElts
-  size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
-
-  if ((unsigned)*FirstRealEltIter < FirstRealIndex)
-    return false;
-  // BlockElts[0] must have the following value if it isn't undef:
-  size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
-
-  // Check the first element
-  if (Elt0 % NumEltsPerBlock != 0)
-    return false;
-  // Check that the sequence indeed consists of consecutive integers (modulo
-  // undefs)
-  for (size_t I = 0; I < NumEltsPerBlock; I++)
-    if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
-      return false;
-
-  DupLaneOp = Elt0 / NumEltsPerBlock;
-  return true;
-}
-
+/// Check if a vector shuffle corresponds to a DUP instructions with a larger 
+/// element width than the vector lane type. If that is the case the function 
+/// returns true and writes the value of the DUP instruction lane operand into 
+/// DupLaneOp 
+static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize, 
+                          unsigned &DupLaneOp) { 
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 
+         "Only possible block sizes for wide DUP are: 16, 32, 64"); 
+ 
+  if (BlockSize <= VT.getScalarSizeInBits()) 
+    return false; 
+  if (BlockSize % VT.getScalarSizeInBits() != 0) 
+    return false; 
+  if (VT.getSizeInBits() % BlockSize != 0) 
+    return false; 
+ 
+  size_t SingleVecNumElements = VT.getVectorNumElements(); 
+  size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits(); 
+  size_t NumBlocks = VT.getSizeInBits() / BlockSize; 
+ 
+  // We are looking for masks like 
+  // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element 
+  // might be replaced by 'undefined'. BlockIndices will eventually contain 
+  // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7] 
+  // for the above examples) 
+  SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1); 
+  for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++) 
+    for (size_t I = 0; I < NumEltsPerBlock; I++) { 
+      int Elt = M[BlockIndex * NumEltsPerBlock + I]; 
+      if (Elt < 0) 
+        continue; 
+      // For now we don't support shuffles that use the second operand 
+      if ((unsigned)Elt >= SingleVecNumElements) 
+        return false; 
+      if (BlockElts[I] < 0) 
+        BlockElts[I] = Elt; 
+      else if (BlockElts[I] != Elt) 
+        return false; 
+    } 
+ 
+  // We found a candidate block (possibly with some undefs). It must be a 
+  // sequence of consecutive integers starting with a value divisible by 
+  // NumEltsPerBlock with some values possibly replaced by undef-s. 
+ 
+  // Find first non-undef element 
+  auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; }); 
+  assert(FirstRealEltIter != BlockElts.end() && 
+         "Shuffle with all-undefs must have been caught by previous cases, " 
+         "e.g. isSplat()"); 
+  if (FirstRealEltIter == BlockElts.end()) { 
+    DupLaneOp = 0; 
+    return true; 
+  } 
+ 
+  // Index of FirstRealElt in BlockElts 
+  size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin(); 
+ 
+  if ((unsigned)*FirstRealEltIter < FirstRealIndex) 
+    return false; 
+  // BlockElts[0] must have the following value if it isn't undef: 
+  size_t Elt0 = *FirstRealEltIter - FirstRealIndex; 
+ 
+  // Check the first element 
+  if (Elt0 % NumEltsPerBlock != 0) 
+    return false; 
+  // Check that the sequence indeed consists of consecutive integers (modulo 
+  // undefs) 
+  for (size_t I = 0; I < NumEltsPerBlock; I++) 
+    if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I) 
+      return false; 
+ 
+  DupLaneOp = Elt0 / NumEltsPerBlock; 
+  return true; 
+} 
+ 
 // check if an EXT instruction can handle the shuffle mask when the
 // vector sources of the shuffle are different.
 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
@@ -8732,60 +8732,60 @@ static unsigned getDUPLANEOp(EVT EltType) {
   llvm_unreachable("Invalid vector element type?");
 }
 
-static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
-                            unsigned Opcode, SelectionDAG &DAG) {
-  // Try to eliminate a bitcasted extract subvector before a DUPLANE.
-  auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
-    // Match: dup (bitcast (extract_subv X, C)), LaneC
-    if (BitCast.getOpcode() != ISD::BITCAST ||
-        BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
-      return false;
-
-    // The extract index must align in the destination type. That may not
-    // happen if the bitcast is from narrow to wide type.
-    SDValue Extract = BitCast.getOperand(0);
-    unsigned ExtIdx = Extract.getConstantOperandVal(1);
-    unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
-    unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
-    unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
-    if (ExtIdxInBits % CastedEltBitWidth != 0)
-      return false;
-
-    // Update the lane value by offsetting with the scaled extract index.
-    LaneC += ExtIdxInBits / CastedEltBitWidth;
-
-    // Determine the casted vector type of the wide vector input.
-    // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
-    // Examples:
-    // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
-    // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
-    unsigned SrcVecNumElts =
-        Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
-    CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
-                              SrcVecNumElts);
-    return true;
-  };
-  MVT CastVT;
-  if (getScaledOffsetDup(V, Lane, CastVT)) {
-    V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
-  } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
-    // The lane is incremented by the index of the extract.
-    // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
-    Lane += V.getConstantOperandVal(1);
-    V = V.getOperand(0);
-  } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
-    // The lane is decremented if we are splatting from the 2nd operand.
-    // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
-    unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
-    Lane -= Idx * VT.getVectorNumElements() / 2;
-    V = WidenVector(V.getOperand(Idx), DAG);
-  } else if (VT.getSizeInBits() == 64) {
-    // Widen the operand to 128-bit register with undef.
-    V = WidenVector(V, DAG);
-  }
-  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
-}
-
+static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, 
+                            unsigned Opcode, SelectionDAG &DAG) { 
+  // Try to eliminate a bitcasted extract subvector before a DUPLANE. 
+  auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) { 
+    // Match: dup (bitcast (extract_subv X, C)), LaneC 
+    if (BitCast.getOpcode() != ISD::BITCAST || 
+        BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR) 
+      return false; 
+ 
+    // The extract index must align in the destination type. That may not 
+    // happen if the bitcast is from narrow to wide type. 
+    SDValue Extract = BitCast.getOperand(0); 
+    unsigned ExtIdx = Extract.getConstantOperandVal(1); 
+    unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits(); 
+    unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth; 
+    unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits(); 
+    if (ExtIdxInBits % CastedEltBitWidth != 0) 
+      return false; 
+ 
+    // Update the lane value by offsetting with the scaled extract index. 
+    LaneC += ExtIdxInBits / CastedEltBitWidth; 
+ 
+    // Determine the casted vector type of the wide vector input. 
+    // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC' 
+    // Examples: 
+    // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3 
+    // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5 
+    unsigned SrcVecNumElts = 
+        Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth; 
+    CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(), 
+                              SrcVecNumElts); 
+    return true; 
+  }; 
+  MVT CastVT; 
+  if (getScaledOffsetDup(V, Lane, CastVT)) { 
+    V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0)); 
+  } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) { 
+    // The lane is incremented by the index of the extract. 
+    // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3 
+    Lane += V.getConstantOperandVal(1); 
+    V = V.getOperand(0); 
+  } else if (V.getOpcode() == ISD::CONCAT_VECTORS) { 
+    // The lane is decremented if we are splatting from the 2nd operand. 
+    // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1 
+    unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; 
+    Lane -= Idx * VT.getVectorNumElements() / 2; 
+    V = WidenVector(V.getOperand(Idx), DAG); 
+  } else if (VT.getSizeInBits() == 64) { 
+    // Widen the operand to 128-bit register with undef. 
+    V = WidenVector(V, DAG); 
+  } 
+  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64)); 
+} 
+ 
 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                    SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -8819,25 +8819,25 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 
     // Otherwise, duplicate from the lane of the input vector.
     unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-    return constructDup(V1, Lane, dl, VT, Opcode, DAG);
-  }
-
-  // Check if the mask matches a DUP for a wider element
-  for (unsigned LaneSize : {64U, 32U, 16U}) {
-    unsigned Lane = 0;
-    if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
-      unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
-                                       : LaneSize == 32 ? AArch64ISD::DUPLANE32
-                                                        : AArch64ISD::DUPLANE16;
-      // Cast V1 to an integer vector with required lane size
-      MVT NewEltTy = MVT::getIntegerVT(LaneSize);
-      unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
-      MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
-      V1 = DAG.getBitcast(NewVecTy, V1);
-      // Constuct the DUP instruction
-      V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
-      // Cast back to the original type
-      return DAG.getBitcast(VT, V1);
+    return constructDup(V1, Lane, dl, VT, Opcode, DAG); 
+  } 
+
+  // Check if the mask matches a DUP for a wider element 
+  for (unsigned LaneSize : {64U, 32U, 16U}) { 
+    unsigned Lane = 0; 
+    if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) { 
+      unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64 
+                                       : LaneSize == 32 ? AArch64ISD::DUPLANE32 
+                                                        : AArch64ISD::DUPLANE16; 
+      // Cast V1 to an integer vector with required lane size 
+      MVT NewEltTy = MVT::getIntegerVT(LaneSize); 
+      unsigned NewEltCount = VT.getSizeInBits() / LaneSize; 
+      MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount); 
+      V1 = DAG.getBitcast(NewVecTy, V1); 
+      // Constuct the DUP instruction 
+      V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG); 
+      // Cast back to the original type 
+      return DAG.getBitcast(VT, V1); 
     }
   }
 
@@ -8909,7 +8909,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 
     EVT ScalarVT = VT.getVectorElementType();
 
-    if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
+    if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger()) 
       ScalarVT = MVT::i32;
 
     return DAG.getNode(
@@ -8950,9 +8950,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
   EVT ElemVT = VT.getScalarType();
   SDValue SplatVal = Op.getOperand(0);
 
-  if (useSVEForFixedLengthVectorVT(VT))
-    return LowerToScalableOp(Op, DAG);
-
+  if (useSVEForFixedLengthVectorVT(VT)) 
+    return LowerToScalableOp(Op, DAG); 
+ 
   // Extend input splat value where needed to fit into a GPR (32b or 64b only)
   // FPRs don't have this restriction.
   switch (ElemVT.getSimpleVT().SimpleTy) {
@@ -9382,9 +9382,9 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
 
 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
                                              SelectionDAG &DAG) const {
-  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
-    return LowerToScalableOp(Op, DAG);
-
+  if (useSVEForFixedLengthVectorVT(Op.getValueType())) 
+    return LowerToScalableOp(Op, DAG); 
+ 
   // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
   if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
     return Res;
@@ -9543,18 +9543,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   bool isConstant = true;
   bool AllLanesExtractElt = true;
   unsigned NumConstantLanes = 0;
-  unsigned NumDifferentLanes = 0;
-  unsigned NumUndefLanes = 0;
+  unsigned NumDifferentLanes = 0; 
+  unsigned NumUndefLanes = 0; 
   SDValue Value;
   SDValue ConstantValue;
   for (unsigned i = 0; i < NumElts; ++i) {
     SDValue V = Op.getOperand(i);
     if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
       AllLanesExtractElt = false;
-    if (V.isUndef()) {
-      ++NumUndefLanes;
+    if (V.isUndef()) { 
+      ++NumUndefLanes; 
       continue;
-    }
+    } 
     if (i > 0)
       isOnlyLowElement = false;
     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
@@ -9570,10 +9570,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
     if (!Value.getNode())
       Value = V;
-    else if (V != Value) {
+    else if (V != Value) { 
       usesOnlyOneValue = false;
-      ++NumDifferentLanes;
-    }
+      ++NumDifferentLanes; 
+    } 
   }
 
   if (!Value.getNode()) {
@@ -9699,20 +9699,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
     }
   }
 
-  // If we need to insert a small number of different non-constant elements and
-  // the vector width is sufficiently large, prefer using DUP with the common
-  // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
-  // skip the constant lane handling below.
-  bool PreferDUPAndInsert =
-      !isConstant && NumDifferentLanes >= 1 &&
-      NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
-      NumDifferentLanes >= NumConstantLanes;
-
+  // If we need to insert a small number of different non-constant elements and 
+  // the vector width is sufficiently large, prefer using DUP with the common 
+  // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred, 
+  // skip the constant lane handling below. 
+  bool PreferDUPAndInsert = 
+      !isConstant && NumDifferentLanes >= 1 && 
+      NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) && 
+      NumDifferentLanes >= NumConstantLanes; 
+ 
   // If there was only one constant value used and for more than one lane,
   // start by splatting that value, then replace the non-constant lanes. This
   // is better than the default, which will perform a separate initialization
   // for each lane.
-  if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
+  if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) { 
     // Firstly, try to materialize the splat constant.
     SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
             Val = ConstantBuildVector(Vec, DAG);
@@ -9748,22 +9748,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
       return shuffle;
   }
 
-  if (PreferDUPAndInsert) {
-    // First, build a constant vector with the common element.
-    SmallVector<SDValue, 8> Ops;
-    for (unsigned I = 0; I < NumElts; ++I)
-      Ops.push_back(Value);
-    SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
-    // Next, insert the elements that do not match the common value.
-    for (unsigned I = 0; I < NumElts; ++I)
-      if (Op.getOperand(I) != Value)
-        NewVector =
-            DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
-                        Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
-
-    return NewVector;
-  }
-
+  if (PreferDUPAndInsert) { 
+    // First, build a constant vector with the common element. 
+    SmallVector<SDValue, 8> Ops; 
+    for (unsigned I = 0; I < NumElts; ++I) 
+      Ops.push_back(Value); 
+    SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG); 
+    // Next, insert the elements that do not match the common value. 
+    for (unsigned I = 0; I < NumElts; ++I) 
+      if (Op.getOperand(I) != Value) 
+        NewVector = 
+            DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector, 
+                        Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64)); 
+ 
+    return NewVector; 
+  } 
+ 
   // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
   // know the default expansion would otherwise fall back on something even
   // worse. For a vector with one or two non-undef values, that's
@@ -9812,18 +9812,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   return SDValue();
 }
 
-SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  assert(Op.getValueType().isScalableVector() &&
-         isTypeLegal(Op.getValueType()) &&
-         "Expected legal scalable vector type!");
-
-  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
-    return Op;
-
-  return SDValue();
-}
-
+SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, 
+                                                   SelectionDAG &DAG) const { 
+  assert(Op.getValueType().isScalableVector() && 
+         isTypeLegal(Op.getValueType()) && 
+         "Expected legal scalable vector type!"); 
+ 
+  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2) 
+    return Op; 
+ 
+  return SDValue(); 
+} 
+ 
 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
                                                       SelectionDAG &DAG) const {
   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
@@ -9919,8 +9919,8 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
 
   // If this is extracting the upper 64-bits of a 128-bit vector, we match
   // that directly.
-  if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
-      InVT.getSizeInBits() == 128)
+  if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 && 
+      InVT.getSizeInBits() == 128) 
     return Op;
 
   return SDValue();
@@ -9934,34 +9934,34 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
   EVT InVT = Op.getOperand(1).getValueType();
   unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
 
-  if (InVT.isScalableVector()) {
-    SDLoc DL(Op);
-    EVT VT = Op.getValueType();
-
-    if (!isTypeLegal(VT) || !VT.isInteger())
-      return SDValue();
-
-    SDValue Vec0 = Op.getOperand(0);
-    SDValue Vec1 = Op.getOperand(1);
-
-    // Ensure the subvector is half the size of the main vector.
-    if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
-      return SDValue();
-
-    // Extend elements of smaller vector...
-    EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
-    SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
-
-    if (Idx == 0) {
-      SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
-      return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
-    } else if (Idx == InVT.getVectorMinNumElements()) {
-      SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
-      return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
-    }
-
+  if (InVT.isScalableVector()) { 
+    SDLoc DL(Op); 
+    EVT VT = Op.getValueType(); 
+ 
+    if (!isTypeLegal(VT) || !VT.isInteger()) 
+      return SDValue(); 
+ 
+    SDValue Vec0 = Op.getOperand(0); 
+    SDValue Vec1 = Op.getOperand(1); 
+ 
+    // Ensure the subvector is half the size of the main vector. 
+    if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) 
+      return SDValue(); 
+ 
+    // Extend elements of smaller vector... 
+    EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); 
+    SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); 
+ 
+    if (Idx == 0) { 
+      SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); 
+      return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0); 
+    } else if (Idx == InVT.getVectorMinNumElements()) { 
+      SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0); 
+      return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec); 
+    } 
+ 
     return SDValue();
-  }
+  } 
 
   // This will be matched by custom code during ISelDAGToDAG.
   if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
@@ -9970,42 +9970,42 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
   return SDValue();
 }
 
-SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-
-  if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
-    return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
-
-  assert(VT.isScalableVector() && "Expected a scalable vector.");
-
-  bool Signed = Op.getOpcode() == ISD::SDIV;
-  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
-
-  if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
-    return LowerToPredicatedOp(Op, DAG, PredOpcode);
-
-  // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
-  // operations, and truncate the result.
-  EVT WidenedVT;
-  if (VT == MVT::nxv16i8)
-    WidenedVT = MVT::nxv8i16;
-  else if (VT == MVT::nxv8i16)
-    WidenedVT = MVT::nxv4i32;
-  else
-    llvm_unreachable("Unexpected Custom DIV operation");
-
-  SDLoc dl(Op);
-  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
-  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
-  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
-  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
-  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
-  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
-  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
-  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
-  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
-}
-
+SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+ 
+  if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) 
+    return LowerFixedLengthVectorIntDivideToSVE(Op, DAG); 
+ 
+  assert(VT.isScalableVector() && "Expected a scalable vector."); 
+ 
+  bool Signed = Op.getOpcode() == ISD::SDIV; 
+  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; 
+ 
+  if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) 
+    return LowerToPredicatedOp(Op, DAG, PredOpcode); 
+ 
+  // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit 
+  // operations, and truncate the result. 
+  EVT WidenedVT; 
+  if (VT == MVT::nxv16i8) 
+    WidenedVT = MVT::nxv8i16; 
+  else if (VT == MVT::nxv8i16) 
+    WidenedVT = MVT::nxv4i32; 
+  else 
+    llvm_unreachable("Unexpected Custom DIV operation"); 
+ 
+  SDLoc dl(Op); 
+  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; 
+  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; 
+  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0)); 
+  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1)); 
+  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0)); 
+  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1)); 
+  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo); 
+  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi); 
+  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi); 
+} 
+ 
 bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
   // Currently no fixed length shuffles that require SVE are legal.
   if (useSVEForFixedLengthVectorVT(VT))
@@ -10105,12 +10105,12 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
   }
 
   if (!VT.isVector() || VT.isScalableVector())
-    return SDValue();
+    return SDValue(); 
 
   if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
     return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
 
-  return SDValue();
+  return SDValue(); 
 }
 
 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
@@ -10128,8 +10128,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
     llvm_unreachable("unexpected shift opcode");
 
   case ISD::SHL:
-    if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
-      return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
+    if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) 
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); 
 
     if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
       return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
@@ -10140,9 +10140,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
                        Op.getOperand(0), Op.getOperand(1));
   case ISD::SRA:
   case ISD::SRL:
-    if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
-      unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
-                                                : AArch64ISD::SRL_PRED;
+    if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) { 
+      unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED 
+                                                : AArch64ISD::SRL_PRED; 
       return LowerToPredicatedOp(Op, DAG, Opc);
     }
 
@@ -10194,7 +10194,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
         Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
       else
         Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
-      return DAG.getNOT(dl, Fcmeq, VT);
+      return DAG.getNOT(dl, Fcmeq, VT); 
     }
     case AArch64CC::EQ:
       if (IsZero)
@@ -10233,7 +10233,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
       Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
     else
       Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
-    return DAG.getNOT(dl, Cmeq, VT);
+    return DAG.getNOT(dl, Cmeq, VT); 
   }
   case AArch64CC::EQ:
     if (IsZero)
@@ -10274,9 +10274,9 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
   }
 
-  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
-    return LowerFixedLengthVectorSetccToSVE(Op, DAG);
-
+  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType())) 
+    return LowerFixedLengthVectorSetccToSVE(Op, DAG); 
+ 
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -10349,51 +10349,51 @@ static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
 
 SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
                                               SelectionDAG &DAG) const {
-  SDValue Src = Op.getOperand(0);
-
-  // Try to lower fixed length reductions to SVE.
-  EVT SrcVT = Src.getValueType();
-  bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
-                      Op.getOpcode() == ISD::VECREDUCE_OR ||
-                      Op.getOpcode() == ISD::VECREDUCE_XOR ||
-                      Op.getOpcode() == ISD::VECREDUCE_FADD ||
-                      (Op.getOpcode() != ISD::VECREDUCE_ADD &&
-                       SrcVT.getVectorElementType() == MVT::i64);
-  if (SrcVT.isScalableVector() ||
-      useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
-
-    if (SrcVT.getVectorElementType() == MVT::i1)
-      return LowerPredReductionToSVE(Op, DAG);
-
-    switch (Op.getOpcode()) {
-    case ISD::VECREDUCE_ADD:
-      return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
-    case ISD::VECREDUCE_AND:
-      return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
-    case ISD::VECREDUCE_OR:
-      return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
-    case ISD::VECREDUCE_SMAX:
-      return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
-    case ISD::VECREDUCE_SMIN:
-      return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
-    case ISD::VECREDUCE_UMAX:
-      return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
-    case ISD::VECREDUCE_UMIN:
-      return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
-    case ISD::VECREDUCE_XOR:
-      return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
-    case ISD::VECREDUCE_FADD:
-      return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
-    case ISD::VECREDUCE_FMAX:
-      return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
-    case ISD::VECREDUCE_FMIN:
-      return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
-    default:
-      llvm_unreachable("Unhandled fixed length reduction");
-    }
-  }
-
-  // Lower NEON reductions.
+  SDValue Src = Op.getOperand(0); 
+ 
+  // Try to lower fixed length reductions to SVE. 
+  EVT SrcVT = Src.getValueType(); 
+  bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND || 
+                      Op.getOpcode() == ISD::VECREDUCE_OR || 
+                      Op.getOpcode() == ISD::VECREDUCE_XOR || 
+                      Op.getOpcode() == ISD::VECREDUCE_FADD || 
+                      (Op.getOpcode() != ISD::VECREDUCE_ADD && 
+                       SrcVT.getVectorElementType() == MVT::i64); 
+  if (SrcVT.isScalableVector() || 
+      useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { 
+ 
+    if (SrcVT.getVectorElementType() == MVT::i1) 
+      return LowerPredReductionToSVE(Op, DAG); 
+ 
+    switch (Op.getOpcode()) { 
+    case ISD::VECREDUCE_ADD: 
+      return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_AND: 
+      return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_OR: 
+      return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_SMAX: 
+      return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_SMIN: 
+      return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_UMAX: 
+      return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_UMIN: 
+      return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_XOR: 
+      return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_FADD: 
+      return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_FMAX: 
+      return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG); 
+    case ISD::VECREDUCE_FMIN: 
+      return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG); 
+    default: 
+      llvm_unreachable("Unhandled fixed length reduction"); 
+    } 
+  } 
+ 
+  // Lower NEON reductions. 
   SDLoc dl(Op);
   switch (Op.getOpcode()) {
   case ISD::VECREDUCE_ADD:
@@ -10410,13 +10410,13 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
     return DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
         DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
-        Src);
+        Src); 
   }
   case ISD::VECREDUCE_FMIN: {
     return DAG.getNode(
         ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
         DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
-        Src);
+        Src); 
   }
   default:
     llvm_unreachable("Unhandled reduction");
@@ -10426,7 +10426,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
                                                     SelectionDAG &DAG) const {
   auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
-  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
+  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) 
     return SDValue();
 
   // LSE has an atomic load-add instruction, but not a load-sub.
@@ -10443,7 +10443,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
                                                     SelectionDAG &DAG) const {
   auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
-  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
+  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) 
     return SDValue();
 
   // LSE has an atomic load-clear instruction, but not a load-and.
@@ -10544,17 +10544,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
 
 /// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
 template <unsigned NumVecs>
-static bool
-setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
-              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
+static bool 
+setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, 
+              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) { 
   Info.opc = ISD::INTRINSIC_VOID;
   // Retrieve EC from first vector argument.
-  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
+  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType()); 
   ElementCount EC = VT.getVectorElementCount();
 #ifndef NDEBUG
   // Check the assumption that all input vectors are the same type.
   for (unsigned I = 0; I < NumVecs; ++I)
-    assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
+    assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && 
            "Invalid type.");
 #endif
   // memVT is `NumVecs * VT`.
@@ -10577,11 +10577,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
   case Intrinsic::aarch64_sve_st2:
-    return setInfoSVEStN<2>(*this, DL, Info, I);
+    return setInfoSVEStN<2>(*this, DL, Info, I); 
   case Intrinsic::aarch64_sve_st3:
-    return setInfoSVEStN<3>(*this, DL, Info, I);
+    return setInfoSVEStN<3>(*this, DL, Info, I); 
   case Intrinsic::aarch64_sve_st4:
-    return setInfoSVEStN<4>(*this, DL, Info, I);
+    return setInfoSVEStN<4>(*this, DL, Info, I); 
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
   case Intrinsic::aarch64_neon_ld4:
@@ -10737,15 +10737,15 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
 bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
-  uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
-  uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
+  uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize(); 
+  uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize(); 
   return NumBits1 > NumBits2;
 }
 bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
     return false;
-  uint64_t NumBits1 = VT1.getFixedSizeInBits();
-  uint64_t NumBits2 = VT2.getFixedSizeInBits();
+  uint64_t NumBits1 = VT1.getFixedSizeInBits(); 
+  uint64_t NumBits2 = VT2.getFixedSizeInBits(); 
   return NumBits1 > NumBits2;
 }
 
@@ -10987,43 +10987,43 @@ bool AArch64TargetLowering::shouldSinkOperands(
 
     return true;
   }
-  case Instruction::Mul: {
-    bool IsProfitable = false;
-    for (auto &Op : I->operands()) {
-      // Make sure we are not already sinking this operand
-      if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
-        continue;
-
-      ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
-      if (!Shuffle || !Shuffle->isZeroEltSplat())
-        continue;
-
-      Value *ShuffleOperand = Shuffle->getOperand(0);
-      InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
-      if (!Insert)
-        continue;
-
-      Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
-      if (!OperandInstr)
-        continue;
-
-      ConstantInt *ElementConstant =
-          dyn_cast<ConstantInt>(Insert->getOperand(2));
-      // Check that the insertelement is inserting into element 0
-      if (!ElementConstant || ElementConstant->getZExtValue() != 0)
-        continue;
-
-      unsigned Opcode = OperandInstr->getOpcode();
-      if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
-        continue;
-
-      Ops.push_back(&Shuffle->getOperandUse(0));
-      Ops.push_back(&Op);
-      IsProfitable = true;
-    }
-
-    return IsProfitable;
-  }
+  case Instruction::Mul: { 
+    bool IsProfitable = false; 
+    for (auto &Op : I->operands()) { 
+      // Make sure we are not already sinking this operand 
+      if (any_of(Ops, [&](Use *U) { return U->get() == Op; })) 
+        continue; 
+ 
+      ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op); 
+      if (!Shuffle || !Shuffle->isZeroEltSplat()) 
+        continue; 
+ 
+      Value *ShuffleOperand = Shuffle->getOperand(0); 
+      InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand); 
+      if (!Insert) 
+        continue; 
+ 
+      Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1)); 
+      if (!OperandInstr) 
+        continue; 
+ 
+      ConstantInt *ElementConstant = 
+          dyn_cast<ConstantInt>(Insert->getOperand(2)); 
+      // Check that the insertelement is inserting into element 0 
+      if (!ElementConstant || ElementConstant->getZExtValue() != 0) 
+        continue; 
+ 
+      unsigned Opcode = OperandInstr->getOpcode(); 
+      if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt) 
+        continue; 
+ 
+      Ops.push_back(&Shuffle->getOperandUse(0)); 
+      Ops.push_back(&Op); 
+      IsProfitable = true; 
+    } 
+ 
+    return IsProfitable; 
+  } 
   default:
     return false;
   }
@@ -11359,12 +11359,12 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
       {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
 
   std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
-  assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
+  assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && 
          "invalid tuple vector type!");
 
-  EVT SplitVT =
-      EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-                       VT.getVectorElementCount().divideCoefficientBy(N));
+  EVT SplitVT = 
+      EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 
+                       VT.getVectorElementCount().divideCoefficientBy(N)); 
   assert(isTypeLegal(SplitVT));
 
   SmallVector<EVT, 5> VTs(N, SplitVT);
@@ -11655,86 +11655,86 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
 }
 
-// VECREDUCE_ADD( EXTEND(v16i8_type) ) to
-// VECREDUCE_ADD( DOTv16i8(v16i8_type) )
-static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
-                                          const AArch64Subtarget *ST) {
-  SDValue Op0 = N->getOperand(0);
-  if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32)
-    return SDValue();
-
-  if (Op0.getValueType().getVectorElementType() != MVT::i32)
-    return SDValue();
-
-  unsigned ExtOpcode = Op0.getOpcode();
-  if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
-    return SDValue();
-
-  EVT Op0VT = Op0.getOperand(0).getValueType();
-  if (Op0VT != MVT::v16i8)
-    return SDValue();
-
-  SDLoc DL(Op0);
-  SDValue Ones = DAG.getConstant(1, DL, Op0VT);
-  SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
-  auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND)
-                         ? Intrinsic::aarch64_neon_udot
-                         : Intrinsic::aarch64_neon_sdot;
-  SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(),
-                            DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros,
-                            Ones, Op0.getOperand(0));
-  return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
-}
-
-// Given a ABS node, detect the following pattern:
-// (ABS (SUB (EXTEND a), (EXTEND b))).
-// Generates UABD/SABD instruction.
-static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const AArch64Subtarget *Subtarget) {
-  SDValue AbsOp1 = N->getOperand(0);
-  SDValue Op0, Op1;
-
-  if (AbsOp1.getOpcode() != ISD::SUB)
-    return SDValue();
-
-  Op0 = AbsOp1.getOperand(0);
-  Op1 = AbsOp1.getOperand(1);
-
-  unsigned Opc0 = Op0.getOpcode();
-  // Check if the operands of the sub are (zero|sign)-extended.
-  if (Opc0 != Op1.getOpcode() ||
-      (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
-    return SDValue();
-
-  EVT VectorT1 = Op0.getOperand(0).getValueType();
-  EVT VectorT2 = Op1.getOperand(0).getValueType();
-  // Check if vectors are of same type and valid size.
-  uint64_t Size = VectorT1.getFixedSizeInBits();
-  if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
-    return SDValue();
-
-  // Check if vector element types are valid.
-  EVT VT1 = VectorT1.getVectorElementType();
-  if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
-    return SDValue();
-
-  Op0 = Op0.getOperand(0);
-  Op1 = Op1.getOperand(0);
-  unsigned ABDOpcode =
-      (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD;
-  SDValue ABD =
-      DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
-  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
-}
-
+// VECREDUCE_ADD( EXTEND(v16i8_type) ) to 
+// VECREDUCE_ADD( DOTv16i8(v16i8_type) ) 
+static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, 
+                                          const AArch64Subtarget *ST) { 
+  SDValue Op0 = N->getOperand(0); 
+  if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32) 
+    return SDValue(); 
+
+  if (Op0.getValueType().getVectorElementType() != MVT::i32) 
+    return SDValue(); 
+
+  unsigned ExtOpcode = Op0.getOpcode(); 
+  if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND) 
+    return SDValue(); 
+ 
+  EVT Op0VT = Op0.getOperand(0).getValueType(); 
+  if (Op0VT != MVT::v16i8) 
+    return SDValue(); 
+ 
+  SDLoc DL(Op0); 
+  SDValue Ones = DAG.getConstant(1, DL, Op0VT); 
+  SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32); 
+  auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND) 
+                         ? Intrinsic::aarch64_neon_udot 
+                         : Intrinsic::aarch64_neon_sdot; 
+  SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(), 
+                            DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros, 
+                            Ones, Op0.getOperand(0)); 
+  return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); 
+}
+
+// Given a ABS node, detect the following pattern: 
+// (ABS (SUB (EXTEND a), (EXTEND b))). 
+// Generates UABD/SABD instruction. 
+static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG, 
+                                 TargetLowering::DAGCombinerInfo &DCI, 
+                                 const AArch64Subtarget *Subtarget) { 
+  SDValue AbsOp1 = N->getOperand(0); 
+  SDValue Op0, Op1; 
+ 
+  if (AbsOp1.getOpcode() != ISD::SUB) 
+    return SDValue(); 
+ 
+  Op0 = AbsOp1.getOperand(0); 
+  Op1 = AbsOp1.getOperand(1); 
+ 
+  unsigned Opc0 = Op0.getOpcode(); 
+  // Check if the operands of the sub are (zero|sign)-extended. 
+  if (Opc0 != Op1.getOpcode() || 
+      (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) 
+    return SDValue(); 
+ 
+  EVT VectorT1 = Op0.getOperand(0).getValueType(); 
+  EVT VectorT2 = Op1.getOperand(0).getValueType(); 
+  // Check if vectors are of same type and valid size. 
+  uint64_t Size = VectorT1.getFixedSizeInBits(); 
+  if (VectorT1 != VectorT2 || (Size != 64 && Size != 128)) 
+    return SDValue(); 
+ 
+  // Check if vector element types are valid. 
+  EVT VT1 = VectorT1.getVectorElementType(); 
+  if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32) 
+    return SDValue(); 
+ 
+  Op0 = Op0.getOperand(0); 
+  Op1 = Op1.getOperand(0); 
+  unsigned ABDOpcode = 
+      (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD; 
+  SDValue ABD = 
+      DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); 
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); 
+} 
+ 
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const AArch64Subtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
+  return foldVectorXorShiftIntoCmp(N, DAG, Subtarget); 
 }
 
 SDValue
@@ -11793,157 +11793,157 @@ static bool IsSVECntIntrinsic(SDValue S) {
   return false;
 }
 
-/// Calculates what the pre-extend type is, based on the extension
-/// operation node provided by \p Extend.
-///
-/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
-/// pre-extend type is pulled directly from the operand, while other extend
-/// operations need a bit more inspection to get this information.
-///
-/// \param Extend The SDNode from the DAG that represents the extend operation
-/// \param DAG The SelectionDAG hosting the \p Extend node
-///
-/// \returns The type representing the \p Extend source type, or \p MVT::Other
-/// if no valid type can be determined
-static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
-  switch (Extend.getOpcode()) {
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-    return Extend.getOperand(0).getValueType();
-  case ISD::AssertSext:
-  case ISD::AssertZext:
-  case ISD::SIGN_EXTEND_INREG: {
-    VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
-    if (!TypeNode)
-      return MVT::Other;
-    return TypeNode->getVT();
-  }
-  case ISD::AND: {
-    ConstantSDNode *Constant =
-        dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
-    if (!Constant)
-      return MVT::Other;
-
-    uint32_t Mask = Constant->getZExtValue();
-
-    if (Mask == UCHAR_MAX)
-      return MVT::i8;
-    else if (Mask == USHRT_MAX)
-      return MVT::i16;
-    else if (Mask == UINT_MAX)
-      return MVT::i32;
-
-    return MVT::Other;
-  }
-  default:
-    return MVT::Other;
-  }
-
-  llvm_unreachable("Code path unhandled in calculatePreExtendType!");
-}
-
-/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
-/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
-static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
-                                                SelectionDAG &DAG) {
-
-  ShuffleVectorSDNode *ShuffleNode =
-      dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
-  if (!ShuffleNode)
-    return SDValue();
-
-  // Ensuring the mask is zero before continuing
-  if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
-    return SDValue();
-
-  SDValue InsertVectorElt = VectorShuffle.getOperand(0);
-
-  if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
-    return SDValue();
-
-  SDValue InsertLane = InsertVectorElt.getOperand(2);
-  ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
-  // Ensures the insert is inserting into lane 0
-  if (!Constant || Constant->getZExtValue() != 0)
-    return SDValue();
-
-  SDValue Extend = InsertVectorElt.getOperand(1);
-  unsigned ExtendOpcode = Extend.getOpcode();
-
-  bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
-                ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
-                ExtendOpcode == ISD::AssertSext;
-  if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
-      ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
-    return SDValue();
-
-  EVT TargetType = VectorShuffle.getValueType();
-  EVT PreExtendType = calculatePreExtendType(Extend, DAG);
-
-  if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
-       TargetType != MVT::v2i64) ||
-      (PreExtendType == MVT::Other))
-    return SDValue();
-
-  // Restrict valid pre-extend data type
-  if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
-      PreExtendType != MVT::i32)
-    return SDValue();
-
-  EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
-
-  if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
-    return SDValue();
-
-  if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
-    return SDValue();
-
-  SDLoc DL(VectorShuffle);
-
-  SDValue InsertVectorNode = DAG.getNode(
-      InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
-      DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
-      DAG.getConstant(0, DL, MVT::i64));
-
-  std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
-
-  SDValue VectorShuffleNode =
-      DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
-                           DAG.getUNDEF(PreExtendVT), ShuffleMask);
-
-  SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                                   DL, TargetType, VectorShuffleNode);
-
-  return ExtendNode;
-}
-
-/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
-/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
-static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
-  // If the value type isn't a vector, none of the operands are going to be dups
-  if (!Mul->getValueType(0).isVector())
-    return SDValue();
-
-  SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
-  SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
-
-  // Neither operands have been changed, don't make any further changes
-  if (!Op0 && !Op1)
-    return SDValue();
-
-  SDLoc DL(Mul);
-  return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
-                     Op0 ? Op0 : Mul->getOperand(0),
-                     Op1 ? Op1 : Mul->getOperand(1));
-}
-
+/// Calculates what the pre-extend type is, based on the extension 
+/// operation node provided by \p Extend. 
+/// 
+/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the 
+/// pre-extend type is pulled directly from the operand, while other extend 
+/// operations need a bit more inspection to get this information. 
+/// 
+/// \param Extend The SDNode from the DAG that represents the extend operation 
+/// \param DAG The SelectionDAG hosting the \p Extend node 
+/// 
+/// \returns The type representing the \p Extend source type, or \p MVT::Other 
+/// if no valid type can be determined 
+static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) { 
+  switch (Extend.getOpcode()) { 
+  case ISD::SIGN_EXTEND: 
+  case ISD::ZERO_EXTEND: 
+    return Extend.getOperand(0).getValueType(); 
+  case ISD::AssertSext: 
+  case ISD::AssertZext: 
+  case ISD::SIGN_EXTEND_INREG: { 
+    VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1)); 
+    if (!TypeNode) 
+      return MVT::Other; 
+    return TypeNode->getVT(); 
+  } 
+  case ISD::AND: { 
+    ConstantSDNode *Constant = 
+        dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode()); 
+    if (!Constant) 
+      return MVT::Other; 
+ 
+    uint32_t Mask = Constant->getZExtValue(); 
+ 
+    if (Mask == UCHAR_MAX) 
+      return MVT::i8; 
+    else if (Mask == USHRT_MAX) 
+      return MVT::i16; 
+    else if (Mask == UINT_MAX) 
+      return MVT::i32; 
+ 
+    return MVT::Other; 
+  } 
+  default: 
+    return MVT::Other; 
+  } 
+ 
+  llvm_unreachable("Code path unhandled in calculatePreExtendType!"); 
+} 
+ 
+/// Combines a dup(sext/zext) node pattern into sext/zext(dup) 
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt 
+static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, 
+                                                SelectionDAG &DAG) { 
+ 
+  ShuffleVectorSDNode *ShuffleNode = 
+      dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode()); 
+  if (!ShuffleNode) 
+    return SDValue(); 
+ 
+  // Ensuring the mask is zero before continuing 
+  if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0) 
+    return SDValue(); 
+ 
+  SDValue InsertVectorElt = VectorShuffle.getOperand(0); 
+ 
+  if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT) 
+    return SDValue(); 
+ 
+  SDValue InsertLane = InsertVectorElt.getOperand(2); 
+  ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode()); 
+  // Ensures the insert is inserting into lane 0 
+  if (!Constant || Constant->getZExtValue() != 0) 
+    return SDValue(); 
+ 
+  SDValue Extend = InsertVectorElt.getOperand(1); 
+  unsigned ExtendOpcode = Extend.getOpcode(); 
+ 
+  bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND || 
+                ExtendOpcode == ISD::SIGN_EXTEND_INREG || 
+                ExtendOpcode == ISD::AssertSext; 
+  if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND && 
+      ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND) 
+    return SDValue(); 
+ 
+  EVT TargetType = VectorShuffle.getValueType(); 
+  EVT PreExtendType = calculatePreExtendType(Extend, DAG); 
+ 
+  if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 && 
+       TargetType != MVT::v2i64) || 
+      (PreExtendType == MVT::Other)) 
+    return SDValue(); 
+ 
+  // Restrict valid pre-extend data type 
+  if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 && 
+      PreExtendType != MVT::i32) 
+    return SDValue(); 
+ 
+  EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType); 
+ 
+  if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount()) 
+    return SDValue(); 
+ 
+  if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2) 
+    return SDValue(); 
+ 
+  SDLoc DL(VectorShuffle); 
+ 
+  SDValue InsertVectorNode = DAG.getNode( 
+      InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT), 
+      DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType), 
+      DAG.getConstant(0, DL, MVT::i64)); 
+ 
+  std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue()); 
+ 
+  SDValue VectorShuffleNode = 
+      DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode, 
+                           DAG.getUNDEF(PreExtendVT), ShuffleMask); 
+ 
+  SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, 
+                                   DL, TargetType, VectorShuffleNode); 
+ 
+  return ExtendNode; 
+} 
+ 
+/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) 
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt 
+static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) { 
+  // If the value type isn't a vector, none of the operands are going to be dups 
+  if (!Mul->getValueType(0).isVector()) 
+    return SDValue(); 
+ 
+  SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG); 
+  SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG); 
+ 
+  // Neither operands have been changed, don't make any further changes 
+  if (!Op0 && !Op1) 
+    return SDValue(); 
+ 
+  SDLoc DL(Mul); 
+  return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0), 
+                     Op0 ? Op0 : Mul->getOperand(0), 
+                     Op1 ? Op1 : Mul->getOperand(1)); 
+} 
+ 
 static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const AArch64Subtarget *Subtarget) {
-
-  if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
-    return Ext;
-
+ 
+  if (SDValue Ext = performMulVectorExtendCombine(N, DAG)) 
+    return Ext; 
+ 
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -12478,9 +12478,9 @@ static SDValue performSVEAndCombine(SDNode *N,
     return DAG.getNode(Opc, DL, N->getValueType(0), And);
   }
 
-  if (!EnableCombineMGatherIntrinsics)
-    return SDValue();
-
+  if (!EnableCombineMGatherIntrinsics) 
+    return SDValue(); 
+ 
   SDValue Mask = N->getOperand(1);
 
   if (!Src.hasOneUse())
@@ -12534,11 +12534,11 @@ static SDValue performANDCombine(SDNode *N,
   if (VT.isScalableVector())
     return performSVEAndCombine(N, DCI);
 
-  // The combining code below works only for NEON vectors. In particular, it
-  // does not work for SVE when dealing with vectors wider than 128 bits.
-  if (!(VT.is64BitVector() || VT.is128BitVector()))
-    return SDValue();
-
+  // The combining code below works only for NEON vectors. In particular, it 
+  // does not work for SVE when dealing with vectors wider than 128 bits. 
+  if (!(VT.is64BitVector() || VT.is128BitVector())) 
+    return SDValue(); 
+ 
   BuildVectorSDNode *BVN =
       dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
   if (!BVN)
@@ -12599,143 +12599,143 @@ static SDValue performSRLCombine(SDNode *N,
   return SDValue();
 }
 
-// Attempt to form urhadd(OpA, OpB) from
-// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
-// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
-// The original form of the first expression is
-// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
-// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
-// Before this function is called the srl will have been lowered to
-// AArch64ISD::VLSHR.
-// This pass can also recognize signed variants of the patterns that use sign
-// extension instead of zero extension and form a srhadd(OpA, OpB) or a
-// shadd(OpA, OpB) from them.
-static SDValue
-performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
-                             SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-
-  // Since we are looking for a right shift by a constant value of 1 and we are
-  // operating on types at least 16 bits in length (sign/zero extended OpA and
-  // OpB, which are at least 8 bits), it follows that the truncate will always
-  // discard the shifted-in bit and therefore the right shift will be logical
-  // regardless of the signedness of OpA and OpB.
-  SDValue Shift = N->getOperand(0);
-  if (Shift.getOpcode() != AArch64ISD::VLSHR)
-    return SDValue();
-
-  // Is the right shift using an immediate value of 1?
-  uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
-  if (ShiftAmount != 1)
-    return SDValue();
-
-  SDValue ExtendOpA, ExtendOpB;
-  SDValue ShiftOp0 = Shift.getOperand(0);
-  unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
-  if (ShiftOp0Opc == ISD::SUB) {
-
-    SDValue Xor = ShiftOp0.getOperand(1);
-    if (Xor.getOpcode() != ISD::XOR)
-      return SDValue();
-
-    // Is the XOR using a constant amount of all ones in the right hand side?
-    uint64_t C;
-    if (!isAllConstantBuildVector(Xor.getOperand(1), C))
-      return SDValue();
-
-    unsigned ElemSizeInBits = VT.getScalarSizeInBits();
-    APInt CAsAPInt(ElemSizeInBits, C);
-    if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
-      return SDValue();
-
-    ExtendOpA = Xor.getOperand(0);
-    ExtendOpB = ShiftOp0.getOperand(0);
-  } else if (ShiftOp0Opc == ISD::ADD) {
-    ExtendOpA = ShiftOp0.getOperand(0);
-    ExtendOpB = ShiftOp0.getOperand(1);
-  } else
-    return SDValue();
-
-  unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
-  unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
-  if (!(ExtendOpAOpc == ExtendOpBOpc &&
-        (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
-    return SDValue();
-
-  // Is the result of the right shift being truncated to the same value type as
-  // the original operands, OpA and OpB?
-  SDValue OpA = ExtendOpA.getOperand(0);
-  SDValue OpB = ExtendOpB.getOperand(0);
-  EVT OpAVT = OpA.getValueType();
-  assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
-  if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
-    return SDValue();
-
-  SDLoc DL(N);
-  bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
-  bool IsRHADD = ShiftOp0Opc == ISD::SUB;
-  unsigned HADDOpc = IsSignExtend
-                         ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
-                         : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
-  SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
-
-  return ResultHADD;
-}
-
-static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
-  switch (Opcode) {
-  case ISD::FADD:
-    return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
-  case ISD::ADD:
-    return VT == MVT::i64;
-  default:
-    return false;
-  }
-}
-
-static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
-  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-  ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
-
-  EVT VT = N->getValueType(0);
-  const bool FullFP16 =
-      static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
-
-  // Rewrite for pairwise fadd pattern
-  //   (f32 (extract_vector_elt
-  //           (fadd (vXf32 Other)
-  //                 (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
-  // ->
-  //   (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
-  //              (extract_vector_elt (vXf32 Other) 1))
-  if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
-      hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
-    SDLoc DL(N0);
-    SDValue N00 = N0->getOperand(0);
-    SDValue N01 = N0->getOperand(1);
-
-    ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
-    SDValue Other = N00;
-
-    // And handle the commutative case.
-    if (!Shuffle) {
-      Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
-      Other = N01;
-    }
-
-    if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
-        Other == Shuffle->getOperand(0)) {
-      return DAG.getNode(N0->getOpcode(), DL, VT,
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
-                                     DAG.getConstant(0, DL, MVT::i64)),
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
-                                     DAG.getConstant(1, DL, MVT::i64)));
-    }
-  }
-
-  return SDValue();
-}
-
+// Attempt to form urhadd(OpA, OpB) from 
+// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)) 
+// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)). 
+// The original form of the first expression is 
+// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the 
+// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)). 
+// Before this function is called the srl will have been lowered to 
+// AArch64ISD::VLSHR. 
+// This pass can also recognize signed variants of the patterns that use sign 
+// extension instead of zero extension and form a srhadd(OpA, OpB) or a 
+// shadd(OpA, OpB) from them. 
+static SDValue 
+performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, 
+                             SelectionDAG &DAG) { 
+  EVT VT = N->getValueType(0); 
+ 
+  // Since we are looking for a right shift by a constant value of 1 and we are 
+  // operating on types at least 16 bits in length (sign/zero extended OpA and 
+  // OpB, which are at least 8 bits), it follows that the truncate will always 
+  // discard the shifted-in bit and therefore the right shift will be logical 
+  // regardless of the signedness of OpA and OpB. 
+  SDValue Shift = N->getOperand(0); 
+  if (Shift.getOpcode() != AArch64ISD::VLSHR) 
+    return SDValue(); 
+ 
+  // Is the right shift using an immediate value of 1? 
+  uint64_t ShiftAmount = Shift.getConstantOperandVal(1); 
+  if (ShiftAmount != 1) 
+    return SDValue(); 
+ 
+  SDValue ExtendOpA, ExtendOpB; 
+  SDValue ShiftOp0 = Shift.getOperand(0); 
+  unsigned ShiftOp0Opc = ShiftOp0.getOpcode(); 
+  if (ShiftOp0Opc == ISD::SUB) { 
+ 
+    SDValue Xor = ShiftOp0.getOperand(1); 
+    if (Xor.getOpcode() != ISD::XOR) 
+      return SDValue(); 
+ 
+    // Is the XOR using a constant amount of all ones in the right hand side? 
+    uint64_t C; 
+    if (!isAllConstantBuildVector(Xor.getOperand(1), C)) 
+      return SDValue(); 
+ 
+    unsigned ElemSizeInBits = VT.getScalarSizeInBits(); 
+    APInt CAsAPInt(ElemSizeInBits, C); 
+    if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits)) 
+      return SDValue(); 
+ 
+    ExtendOpA = Xor.getOperand(0); 
+    ExtendOpB = ShiftOp0.getOperand(0); 
+  } else if (ShiftOp0Opc == ISD::ADD) { 
+    ExtendOpA = ShiftOp0.getOperand(0); 
+    ExtendOpB = ShiftOp0.getOperand(1); 
+  } else 
+    return SDValue(); 
+ 
+  unsigned ExtendOpAOpc = ExtendOpA.getOpcode(); 
+  unsigned ExtendOpBOpc = ExtendOpB.getOpcode(); 
+  if (!(ExtendOpAOpc == ExtendOpBOpc && 
+        (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND))) 
+    return SDValue(); 
+ 
+  // Is the result of the right shift being truncated to the same value type as 
+  // the original operands, OpA and OpB? 
+  SDValue OpA = ExtendOpA.getOperand(0); 
+  SDValue OpB = ExtendOpB.getOperand(0); 
+  EVT OpAVT = OpA.getValueType(); 
+  assert(ExtendOpA.getValueType() == ExtendOpB.getValueType()); 
+  if (!(VT == OpAVT && OpAVT == OpB.getValueType())) 
+    return SDValue(); 
+ 
+  SDLoc DL(N); 
+  bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND; 
+  bool IsRHADD = ShiftOp0Opc == ISD::SUB; 
+  unsigned HADDOpc = IsSignExtend 
+                         ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD) 
+                         : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD); 
+  SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB); 
+ 
+  return ResultHADD; 
+} 
+ 
+static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) { 
+  switch (Opcode) { 
+  case ISD::FADD: 
+    return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64; 
+  case ISD::ADD: 
+    return VT == MVT::i64; 
+  default: 
+    return false; 
+  } 
+} 
+ 
+static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) { 
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 
+  ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1); 
+ 
+  EVT VT = N->getValueType(0); 
+  const bool FullFP16 = 
+      static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); 
+ 
+  // Rewrite for pairwise fadd pattern 
+  //   (f32 (extract_vector_elt 
+  //           (fadd (vXf32 Other) 
+  //                 (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0)) 
+  // -> 
+  //   (f32 (fadd (extract_vector_elt (vXf32 Other) 0) 
+  //              (extract_vector_elt (vXf32 Other) 1)) 
+  if (ConstantN1 && ConstantN1->getZExtValue() == 0 && 
+      hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) { 
+    SDLoc DL(N0); 
+    SDValue N00 = N0->getOperand(0); 
+    SDValue N01 = N0->getOperand(1); 
+ 
+    ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01); 
+    SDValue Other = N00; 
+ 
+    // And handle the commutative case. 
+    if (!Shuffle) { 
+      Shuffle = dyn_cast<ShuffleVectorSDNode>(N00); 
+      Other = N01; 
+    } 
+ 
+    if (Shuffle && Shuffle->getMaskElt(0) == 1 && 
+        Other == Shuffle->getOperand(0)) { 
+      return DAG.getNode(N0->getOpcode(), DL, VT, 
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, 
+                                     DAG.getConstant(0, DL, MVT::i64)), 
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, 
+                                     DAG.getConstant(1, DL, MVT::i64))); 
+    } 
+  } 
+ 
+  return SDValue(); 
+} 
+ 
 static SDValue performConcatVectorsCombine(SDNode *N,
                                            TargetLowering::DAGCombinerInfo &DCI,
                                            SelectionDAG &DAG) {
@@ -12781,9 +12781,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
-  // subvectors from the same original vectors. Combine these into a single
-  // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
+  // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted 
+  // subvectors from the same original vectors. Combine these into a single 
+  // [us]rhadd or [us]hadd that operates on the two original vectors. Example: 
   //  (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
   //                                        extract_subvector (v16i8 OpB,
   //                                        <0>))),
@@ -12793,8 +12793,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // ->
   //  (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
   if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
-      (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
-       N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
+      (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD || 
+       N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) { 
     SDValue N00 = N0->getOperand(0);
     SDValue N01 = N0->getOperand(1);
     SDValue N10 = N1->getOperand(0);
@@ -13099,43 +13099,43 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
   return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
 }
 
-// ADD(UADDV a, UADDV b) -->  UADDV(ADD a, b)
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
-  EVT VT = N->getValueType(0);
-  // Only scalar integer and vector types.
-  if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
-    return SDValue();
-
-  SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1);
-  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
-    return SDValue();
-
-  auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
-  if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
-    return SDValue();
-
-  SDValue Op1 = LHS->getOperand(0);
-  SDValue Op2 = RHS->getOperand(0);
-  EVT OpVT1 = Op1.getValueType();
-  EVT OpVT2 = Op2.getValueType();
-  if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
-      Op2.getOpcode() != AArch64ISD::UADDV ||
-      OpVT1.getVectorElementType() != VT)
-    return SDValue();
-
-  SDValue Val1 = Op1.getOperand(0);
-  SDValue Val2 = Op2.getOperand(0);
-  EVT ValVT = Val1->getValueType(0);
-  SDLoc DL(N);
-  SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                     DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
-                     DAG.getConstant(0, DL, MVT::i64));
-}
-
+// ADD(UADDV a, UADDV b) -->  UADDV(ADD a, b) 
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) { 
+  EVT VT = N->getValueType(0); 
+  // Only scalar integer and vector types. 
+  if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger()) 
+    return SDValue(); 
+ 
+  SDValue LHS = N->getOperand(0); 
+  SDValue RHS = N->getOperand(1); 
+  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 
+      RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT) 
+    return SDValue(); 
+ 
+  auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 
+  auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1)); 
+  if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue()) 
+    return SDValue(); 
+ 
+  SDValue Op1 = LHS->getOperand(0); 
+  SDValue Op2 = RHS->getOperand(0); 
+  EVT OpVT1 = Op1.getValueType(); 
+  EVT OpVT2 = Op2.getValueType(); 
+  if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 || 
+      Op2.getOpcode() != AArch64ISD::UADDV || 
+      OpVT1.getVectorElementType() != VT) 
+    return SDValue(); 
+ 
+  SDValue Val1 = Op1.getOperand(0); 
+  SDValue Val2 = Op2.getOperand(0); 
+  EVT ValVT = Val1->getValueType(0); 
+  SDLoc DL(N); 
+  SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2); 
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, 
+                     DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal), 
+                     DAG.getConstant(0, DL, MVT::i64)); 
+} 
+ 
 // The basic add/sub long vector instructions have variants with "2" on the end
 // which act on the high-half of their inputs. They are normally matched by
 // patterns like:
@@ -13189,16 +13189,16 @@ static SDValue performAddSubLongCombine(SDNode *N,
   return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
 }
 
-static SDValue performAddSubCombine(SDNode *N,
-                                    TargetLowering::DAGCombinerInfo &DCI,
-                                    SelectionDAG &DAG) {
-  // Try to change sum of two reductions.
-  if (SDValue Val = performUADDVCombine(N, DAG))
-    return Val;
-
-  return performAddSubLongCombine(N, DCI, DAG);
-}
-
+static SDValue performAddSubCombine(SDNode *N, 
+                                    TargetLowering::DAGCombinerInfo &DCI, 
+                                    SelectionDAG &DAG) { 
+  // Try to change sum of two reductions. 
+  if (SDValue Val = performUADDVCombine(N, DAG)) 
+    return Val; 
+ 
+  return performAddSubLongCombine(N, DCI, DAG); 
+} 
+ 
 // Massage DAGs which we can use the high-half "long" operations on into
 // something isel will recognize better. E.g.
 //
@@ -13212,8 +13212,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
-  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
+  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1); 
+  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2); 
   assert(LHS.getValueType().is64BitVector() &&
          RHS.getValueType().is64BitVector() &&
          "unexpected shape for long operation");
@@ -13231,9 +13231,9 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
       return SDValue();
   }
 
-  if (IID == Intrinsic::not_intrinsic)
-    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
-
+  if (IID == Intrinsic::not_intrinsic) 
+    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS); 
+ 
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
                      N->getOperand(0), LHS, RHS);
 }
@@ -13374,8 +13374,8 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
 
   unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
   unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
-  EVT ByteVT =
-      EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
+  EVT ByteVT = 
+      EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize)); 
 
   // Convert everything to the domain of EXT (i.e bytes).
   SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
@@ -13475,25 +13475,25 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
   return DAG.getZExtOrTrunc(Res, DL, VT);
 }
 
-static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
-                                      SelectionDAG &DAG) {
-  SDLoc DL(N);
-
-  SDValue Pred = N->getOperand(1);
-  SDValue VecToReduce = N->getOperand(2);
-
-  // NOTE: The integer reduction's result type is not always linked to the
-  // operand's element type so we construct it from the intrinsic's result type.
-  EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
-  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
-
-  // SVE reductions set the whole vector register with the first element
-  // containing the reduction result, which we'll now extract.
-  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
-                     Zero);
-}
-
+static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, 
+                                      SelectionDAG &DAG) { 
+  SDLoc DL(N); 
+ 
+  SDValue Pred = N->getOperand(1); 
+  SDValue VecToReduce = N->getOperand(2); 
+ 
+  // NOTE: The integer reduction's result type is not always linked to the 
+  // operand's element type so we construct it from the intrinsic's result type. 
+  EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0)); 
+  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce); 
+ 
+  // SVE reductions set the whole vector register with the first element 
+  // containing the reduction result, which we'll now extract. 
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, 
+                     Zero); 
+} 
+ 
 static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
                                      SelectionDAG &DAG) {
   SDLoc DL(N);
@@ -13534,25 +13534,25 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
                      Zero);
 }
 
-// If a merged operation has no inactive lanes we can relax it to a predicated
-// or unpredicated operation, which potentially allows better isel (perhaps
-// using immediate forms) or relaxing register reuse requirements.
-static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
-                                       SelectionDAG &DAG) {
-  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
-  assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
-  SDValue Pg = N->getOperand(1);
-
-  // ISD way to specify an all active predicate.
-  if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
-      (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all))
-    return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
-                       N->getOperand(2), N->getOperand(3));
-
-  // FUTURE: SplatVector(true)
-  return SDValue();
-}
-
+// If a merged operation has no inactive lanes we can relax it to a predicated 
+// or unpredicated operation, which potentially allows better isel (perhaps 
+// using immediate forms) or relaxing register reuse requirements. 
+static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc, 
+                                       SelectionDAG &DAG) { 
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); 
+  assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!"); 
+  SDValue Pg = N->getOperand(1); 
+ 
+  // ISD way to specify an all active predicate. 
+  if ((Pg.getOpcode() == AArch64ISD::PTRUE) && 
+      (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all)) 
+    return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg, 
+                       N->getOperand(2), N->getOperand(3)); 
+ 
+  // FUTURE: SplatVector(true) 
+  return SDValue(); 
+} 
+ 
 static SDValue performIntrinsicCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const AArch64Subtarget *Subtarget) {
@@ -13607,28 +13607,28 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_crc32h:
   case Intrinsic::aarch64_crc32ch:
     return tryCombineCRC32(0xffff, N, DAG);
-  case Intrinsic::aarch64_sve_saddv:
-    // There is no i64 version of SADDV because the sign is irrelevant.
-    if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
-      return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
-    else
-      return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
-  case Intrinsic::aarch64_sve_uaddv:
-    return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
+  case Intrinsic::aarch64_sve_saddv: 
+    // There is no i64 version of SADDV because the sign is irrelevant. 
+    if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64) 
+      return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); 
+    else 
+      return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG); 
+  case Intrinsic::aarch64_sve_uaddv: 
+    return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); 
   case Intrinsic::aarch64_sve_smaxv:
-    return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG); 
   case Intrinsic::aarch64_sve_umaxv:
-    return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG); 
   case Intrinsic::aarch64_sve_sminv:
-    return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG); 
   case Intrinsic::aarch64_sve_uminv:
-    return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG); 
   case Intrinsic::aarch64_sve_orv:
-    return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG); 
   case Intrinsic::aarch64_sve_eorv:
-    return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG); 
   case Intrinsic::aarch64_sve_andv:
-    return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
+    return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG); 
   case Intrinsic::aarch64_sve_index:
     return LowerSVEIntrinsicIndex(N, DAG);
   case Intrinsic::aarch64_sve_dup:
@@ -13639,19 +13639,19 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_sve_ext:
     return LowerSVEIntrinsicEXT(N, DAG);
   case Intrinsic::aarch64_sve_smin:
-    return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); 
   case Intrinsic::aarch64_sve_umin:
-    return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); 
   case Intrinsic::aarch64_sve_smax:
-    return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); 
   case Intrinsic::aarch64_sve_umax:
-    return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); 
   case Intrinsic::aarch64_sve_lsl:
-    return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); 
   case Intrinsic::aarch64_sve_lsr:
-    return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); 
   case Intrinsic::aarch64_sve_asr:
-    return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
+    return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); 
   case Intrinsic::aarch64_sve_cmphs:
     if (!N->getOperand(2).getValueType().isFloatingPoint())
       return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
@@ -13744,15 +13744,15 @@ static SDValue performExtendCombine(SDNode *N,
   // helps the backend to decide that an sabdl2 would be useful, saving a real
   // extract_high operation.
   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
-      (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
-       N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
+      (N->getOperand(0).getOpcode() == AArch64ISD::UABD || 
+       N->getOperand(0).getOpcode() == AArch64ISD::SABD)) { 
     SDNode *ABDNode = N->getOperand(0).getNode();
-    SDValue NewABD =
-        tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
-    if (!NewABD.getNode())
-      return SDValue();
+    SDValue NewABD = 
+        tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG); 
+    if (!NewABD.getNode()) 
+      return SDValue(); 
 
-    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
+    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); 
   }
 
   // This is effectively a custom type legalization for AArch64.
@@ -14235,31 +14235,31 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                       S->getMemOperand()->getFlags());
 }
 
-static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
-  SDLoc DL(N);
-  SDValue Op0 = N->getOperand(0);
-  SDValue Op1 = N->getOperand(1);
-  EVT ResVT = N->getValueType(0);
-
-  // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
-  if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
-    if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
-      SDValue X = Op0.getOperand(0).getOperand(0);
-      return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
-    }
-  }
-
-  // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
-  if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
-    if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
-      SDValue Z = Op1.getOperand(0).getOperand(1);
-      return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
-    }
-  }
-
-  return SDValue();
-}
-
+static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { 
+  SDLoc DL(N); 
+  SDValue Op0 = N->getOperand(0); 
+  SDValue Op1 = N->getOperand(1); 
+  EVT ResVT = N->getValueType(0); 
+ 
+  // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z) 
+  if (Op0.getOpcode() == AArch64ISD::UUNPKLO) { 
+    if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) { 
+      SDValue X = Op0.getOperand(0).getOperand(0); 
+      return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1); 
+    } 
+  } 
+ 
+  // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z) 
+  if (Op1.getOpcode() == AArch64ISD::UUNPKHI) { 
+    if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) { 
+      SDValue Z = Op1.getOperand(0).getOperand(1); 
+      return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z); 
+    } 
+  } 
+ 
+  return SDValue(); 
+} 
+ 
 /// Target-specific DAG combine function for post-increment LD1 (lane) and
 /// post-increment LD1R.
 static SDValue performPostLD1Combine(SDNode *N,
@@ -14398,55 +14398,55 @@ static SDValue performSTORECombine(SDNode *N,
   return SDValue();
 }
 
-static SDValue performMaskedGatherScatterCombine(SDNode *N,
-                                      TargetLowering::DAGCombinerInfo &DCI,
-                                      SelectionDAG &DAG) {
-  MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
-  assert(MGS && "Can only combine gather load or scatter store nodes");
-
-  SDLoc DL(MGS);
-  SDValue Chain = MGS->getChain();
-  SDValue Scale = MGS->getScale();
-  SDValue Index = MGS->getIndex();
-  SDValue Mask = MGS->getMask();
-  SDValue BasePtr = MGS->getBasePtr();
-  ISD::MemIndexType IndexType = MGS->getIndexType();
-
-  EVT IdxVT = Index.getValueType();
-
-  if (DCI.isBeforeLegalize()) {
-    // SVE gather/scatter requires indices of i32/i64. Promote anything smaller
-    // prior to legalisation so the result can be split if required.
-    if ((IdxVT.getVectorElementType() == MVT::i8) ||
-        (IdxVT.getVectorElementType() == MVT::i16)) {
-      EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32);
-      if (MGS->isIndexSigned())
-        Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
-      else
-        Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index);
-
-      if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
-        SDValue PassThru = MGT->getPassThru();
-        SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale };
-        return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
-                                   PassThru.getValueType(), DL, Ops,
-                                   MGT->getMemOperand(),
-                                   MGT->getIndexType(), MGT->getExtensionType());
-      } else {
-        auto *MSC = cast<MaskedScatterSDNode>(MGS);
-        SDValue Data = MSC->getValue();
-        SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale };
-        return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
-                                    MSC->getMemoryVT(), DL, Ops,
-                                    MSC->getMemOperand(), IndexType,
-                                    MSC->isTruncatingStore());
-      }
-    }
-  }
-
-  return SDValue();
-}
-
+static SDValue performMaskedGatherScatterCombine(SDNode *N, 
+                                      TargetLowering::DAGCombinerInfo &DCI, 
+                                      SelectionDAG &DAG) { 
+  MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N); 
+  assert(MGS && "Can only combine gather load or scatter store nodes"); 
+
+  SDLoc DL(MGS); 
+  SDValue Chain = MGS->getChain(); 
+  SDValue Scale = MGS->getScale(); 
+  SDValue Index = MGS->getIndex(); 
+  SDValue Mask = MGS->getMask(); 
+  SDValue BasePtr = MGS->getBasePtr(); 
+  ISD::MemIndexType IndexType = MGS->getIndexType(); 
+ 
+  EVT IdxVT = Index.getValueType(); 
+ 
+  if (DCI.isBeforeLegalize()) { 
+    // SVE gather/scatter requires indices of i32/i64. Promote anything smaller 
+    // prior to legalisation so the result can be split if required. 
+    if ((IdxVT.getVectorElementType() == MVT::i8) || 
+        (IdxVT.getVectorElementType() == MVT::i16)) { 
+      EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32); 
+      if (MGS->isIndexSigned()) 
+        Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); 
+      else 
+        Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index); 
+ 
+      if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) { 
+        SDValue PassThru = MGT->getPassThru(); 
+        SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale }; 
+        return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), 
+                                   PassThru.getValueType(), DL, Ops, 
+                                   MGT->getMemOperand(), 
+                                   MGT->getIndexType(), MGT->getExtensionType()); 
+      } else { 
+        auto *MSC = cast<MaskedScatterSDNode>(MGS); 
+        SDValue Data = MSC->getValue(); 
+        SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale }; 
+        return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), 
+                                    MSC->getMemoryVT(), DL, Ops, 
+                                    MSC->getMemOperand(), IndexType, 
+                                    MSC->isTruncatingStore()); 
+      } 
+    } 
+  } 
+ 
+  return SDValue(); 
+} 
+ 
 /// Target-specific DAG combine function for NEON load/store intrinsics
 /// to merge base address updates.
 static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -15443,7 +15443,7 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
     assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
            "Sign extending from an invalid type");
 
-    EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
+    EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext()); 
 
     SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
                               ExtOp, DAG.getValueType(ExtVT));
@@ -15451,12 +15451,12 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
     return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
   }
 
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  if (!EnableCombineMGatherIntrinsics)
-    return SDValue();
-
+  if (DCI.isBeforeLegalizeOps()) 
+    return SDValue(); 
+ 
+  if (!EnableCombineMGatherIntrinsics) 
+    return SDValue(); 
+ 
   // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
   // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
   unsigned NewOpc;
@@ -15596,11 +15596,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   default:
     LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
     break;
-  case ISD::ABS:
-    return performABSCombine(N, DAG, DCI, Subtarget);
+  case ISD::ABS: 
+    return performABSCombine(N, DAG, DCI, Subtarget); 
   case ISD::ADD:
   case ISD::SUB:
-    return performAddSubCombine(N, DCI, DAG);
+    return performAddSubCombine(N, DCI, DAG); 
   case ISD::XOR:
     return performXorCombine(N, DAG, DCI, Subtarget);
   case ISD::MUL:
@@ -15627,8 +15627,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performExtendCombine(N, DCI, DAG);
   case ISD::SIGN_EXTEND_INREG:
     return performSignExtendInRegCombine(N, DCI, DAG);
-  case ISD::TRUNCATE:
-    return performVectorTruncateCombine(N, DCI, DAG);
+  case ISD::TRUNCATE: 
+    return performVectorTruncateCombine(N, DCI, DAG); 
   case ISD::CONCAT_VECTORS:
     return performConcatVectorsCombine(N, DCI, DAG);
   case ISD::SELECT:
@@ -15641,9 +15641,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
-  case ISD::MGATHER:
-  case ISD::MSCATTER:
-    return performMaskedGatherScatterCombine(N, DCI, DAG);
+  case ISD::MGATHER: 
+  case ISD::MSCATTER: 
+    return performMaskedGatherScatterCombine(N, DCI, DAG); 
   case AArch64ISD::BRCOND:
     return performBRCONDCombine(N, DCI, DAG);
   case AArch64ISD::TBNZ:
@@ -15655,14 +15655,14 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performPostLD1Combine(N, DCI, false);
   case AArch64ISD::NVCAST:
     return performNVCASTCombine(N);
-  case AArch64ISD::UZP1:
-    return performUzpCombine(N, DAG);
+  case AArch64ISD::UZP1: 
+    return performUzpCombine(N, DAG); 
   case ISD::INSERT_VECTOR_ELT:
     return performPostLD1Combine(N, DCI, true);
-  case ISD::EXTRACT_VECTOR_ELT:
-    return performExtractVectorEltCombine(N, DAG);
-  case ISD::VECREDUCE_ADD:
-    return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
+  case ISD::EXTRACT_VECTOR_ELT: 
+    return performExtractVectorEltCombine(N, DAG); 
+  case ISD::VECREDUCE_ADD: 
+    return performVecReduceAddCombine(N, DCI.DAG, Subtarget); 
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -15811,10 +15811,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
 
       uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
       EVT ResVT = N->getValueType(0);
-      uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
-      SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
+      uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); 
+      SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); 
       SDValue Val =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); 
       return DAG.getMergeValues({Val, Chain}, DL);
     }
     case Intrinsic::aarch64_sve_tuple_set: {
@@ -15825,11 +15825,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Vec = N->getOperand(4);
 
       EVT TupleVT = Tuple.getValueType();
-      uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
+      uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); 
 
       uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
-      uint64_t NumLanes =
-          Vec.getValueType().getVectorElementCount().getKnownMinValue();
+      uint64_t NumLanes = 
+          Vec.getValueType().getVectorElementCount().getKnownMinValue(); 
 
       if ((TupleLanes % NumLanes) != 0)
         report_fatal_error("invalid tuple vector!");
@@ -15841,9 +15841,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
         if (I == IdxConst)
           Opnds.push_back(Vec);
         else {
-          SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
-          Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
-                                      Vec.getValueType(), Tuple, ExtIdx));
+          SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL); 
+          Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 
+                                      Vec.getValueType(), Tuple, ExtIdx)); 
         }
       }
       SDValue Concat =
@@ -16065,7 +16065,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
 
   ElementCount ResEC = VT.getVectorElementCount();
 
-  if (InVT.getVectorElementCount() != (ResEC * 2))
+  if (InVT.getVectorElementCount() != (ResEC * 2)) 
     return;
 
   auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -16073,7 +16073,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
     return;
 
   unsigned Index = CIndex->getZExtValue();
-  if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
+  if ((Index != 0) && (Index != ResEC.getKnownMinValue())) 
     return;
 
   unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
@@ -16108,7 +16108,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
   assert(N->getValueType(0) == MVT::i128 &&
          "AtomicCmpSwap on types less than 128 should be legal");
 
-  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
+  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) { 
     // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
     // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
     SDValue Ops[] = {
@@ -16189,8 +16189,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
     return;
 
   case ISD::CTPOP:
-    if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
-      Results.push_back(Result);
+    if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) 
+      Results.push_back(Result); 
     return;
   case AArch64ISD::SADDV:
     ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
@@ -16335,44 +16335,44 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
 
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   if (Size > 128) return AtomicExpansionKind::None;
-
-  // Nand is not supported in LSE.
-  // Leave 128 bits to LLSC or CmpXChg.
-  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
-    if (Subtarget->hasLSE())
-      return AtomicExpansionKind::None;
-    if (Subtarget->outlineAtomics()) {
-      // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
-      // Don't outline them unless
-      // (1) high level <atomic> support approved:
-      //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
-      // (2) low level libgcc and compiler-rt support implemented by:
-      //   min/max outline atomics helpers
-      if (AI->getOperation() != AtomicRMWInst::Min &&
-          AI->getOperation() != AtomicRMWInst::Max &&
-          AI->getOperation() != AtomicRMWInst::UMin &&
-          AI->getOperation() != AtomicRMWInst::UMax) {
-        return AtomicExpansionKind::None;
-      }
-    }
-  }
-
-  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
-  // implement atomicrmw without spilling. If the target address is also on the
-  // stack and close enough to the spill slot, this can lead to a situation
-  // where the monitor always gets cleared and the atomic operation can never
-  // succeed. So at -O0 lower this operation to a CAS loop.
-  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
-    return AtomicExpansionKind::CmpXChg;
-
-  return AtomicExpansionKind::LLSC;
+ 
+  // Nand is not supported in LSE. 
+  // Leave 128 bits to LLSC or CmpXChg. 
+  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { 
+    if (Subtarget->hasLSE()) 
+      return AtomicExpansionKind::None; 
+    if (Subtarget->outlineAtomics()) { 
+      // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. 
+      // Don't outline them unless 
+      // (1) high level <atomic> support approved: 
+      //   http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf 
+      // (2) low level libgcc and compiler-rt support implemented by: 
+      //   min/max outline atomics helpers 
+      if (AI->getOperation() != AtomicRMWInst::Min && 
+          AI->getOperation() != AtomicRMWInst::Max && 
+          AI->getOperation() != AtomicRMWInst::UMin && 
+          AI->getOperation() != AtomicRMWInst::UMax) { 
+        return AtomicExpansionKind::None; 
+      } 
+    } 
+  } 
+ 
+  // At -O0, fast-regalloc cannot cope with the live vregs necessary to 
+  // implement atomicrmw without spilling. If the target address is also on the 
+  // stack and close enough to the spill slot, this can lead to a situation 
+  // where the monitor always gets cleared and the atomic operation can never 
+  // succeed. So at -O0 lower this operation to a CAS loop. 
+  if (getTargetMachine().getOptLevel() == CodeGenOpt::None) 
+    return AtomicExpansionKind::CmpXChg; 
+ 
+  return AtomicExpansionKind::LLSC; 
 }
 
 TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *AI) const {
   // If subtarget has LSE, leave cmpxchg intact for codegen.
-  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
+  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) 
     return AtomicExpansionKind::None;
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
   // implement cmpxchg without spilling. If the address being exchanged is also
@@ -16883,92 +16883,92 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
       Store->isTruncatingStore());
 }
 
-SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
-    SDValue Op, SelectionDAG &DAG) const {
-  SDLoc dl(Op);
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT.getVectorElementType();
-
-  bool Signed = Op.getOpcode() == ISD::SDIV;
-  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
-
-  // Scalable vector i32/i64 DIV is supported.
-  if (EltVT == MVT::i32 || EltVT == MVT::i64)
-    return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
-
-  // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
-  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
-  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
-  EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
-  EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
-
-  // Convert the operands to scalable vectors.
-  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
-  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
-
-  // Extend the scalable operands.
-  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
-  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
-  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
-  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
-  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
-  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
-
-  // Convert back to fixed vectors so the DIV can be further lowered.
-  Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
-  Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
-  Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
-  Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
-  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
-                                 Op0Lo, Op1Lo);
-  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
-                                 Op0Hi, Op1Hi);
-
-  // Convert again to scalable vectors to truncate.
-  ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
-  ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
-  SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
-                                       ResultLo, ResultHi);
-
-  return convertFromScalableVector(DAG, VT, ScalableResult);
-}
-
-SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
-    SDValue Op, SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
-
-  SDLoc DL(Op);
-  SDValue Val = Op.getOperand(0);
-  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
-  Val = convertToScalableVector(DAG, ContainerVT, Val);
-
-  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
-  unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
-
-  // Repeatedly unpack Val until the result is of the desired element type.
-  switch (ContainerVT.getSimpleVT().SimpleTy) {
-  default:
-    llvm_unreachable("unimplemented container type");
-  case MVT::nxv16i8:
-    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
-    if (VT.getVectorElementType() == MVT::i16)
-      break;
-    LLVM_FALLTHROUGH;
-  case MVT::nxv8i16:
-    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
-    if (VT.getVectorElementType() == MVT::i32)
-      break;
-    LLVM_FALLTHROUGH;
-  case MVT::nxv4i32:
-    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
-    assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
-    break;
-  }
-
-  return convertFromScalableVector(DAG, VT, Val);
-}
-
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( 
+    SDValue Op, SelectionDAG &DAG) const { 
+  SDLoc dl(Op); 
+  EVT VT = Op.getValueType(); 
+  EVT EltVT = VT.getVectorElementType(); 
+ 
+  bool Signed = Op.getOpcode() == ISD::SDIV; 
+  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; 
+ 
+  // Scalable vector i32/i64 DIV is supported. 
+  if (EltVT == MVT::i32 || EltVT == MVT::i64) 
+    return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true); 
+ 
+  // Scalable vector i8/i16 DIV is not supported. Promote it to i32. 
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); 
+  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); 
+  EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext()); 
+  EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT); 
+ 
+  // Convert the operands to scalable vectors. 
+  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); 
+  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1)); 
+ 
+  // Extend the scalable operands. 
+  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; 
+  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; 
+  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0); 
+  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1); 
+  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0); 
+  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1); 
+ 
+  // Convert back to fixed vectors so the DIV can be further lowered. 
+  Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo); 
+  Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo); 
+  Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi); 
+  Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi); 
+  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, 
+                                 Op0Lo, Op1Lo); 
+  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, 
+                                 Op0Hi, Op1Hi); 
+ 
+  // Convert again to scalable vectors to truncate. 
+  ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo); 
+  ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi); 
+  SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT, 
+                                       ResultLo, ResultHi); 
+ 
+  return convertFromScalableVector(DAG, VT, ScalableResult); 
+} 
+ 
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE( 
+    SDValue Op, SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); 
+ 
+  SDLoc DL(Op); 
+  SDValue Val = Op.getOperand(0); 
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType()); 
+  Val = convertToScalableVector(DAG, ContainerVT, Val); 
+ 
+  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND; 
+  unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; 
+ 
+  // Repeatedly unpack Val until the result is of the desired element type. 
+  switch (ContainerVT.getSimpleVT().SimpleTy) { 
+  default: 
+    llvm_unreachable("unimplemented container type"); 
+  case MVT::nxv16i8: 
+    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val); 
+    if (VT.getVectorElementType() == MVT::i16) 
+      break; 
+    LLVM_FALLTHROUGH; 
+  case MVT::nxv8i16: 
+    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val); 
+    if (VT.getVectorElementType() == MVT::i32) 
+      break; 
+    LLVM_FALLTHROUGH; 
+  case MVT::nxv4i32: 
+    Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val); 
+    assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!"); 
+    break; 
+  } 
+ 
+  return convertFromScalableVector(DAG, VT, Val); 
+} 
+ 
 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
     SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
@@ -17005,21 +17005,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
   return convertFromScalableVector(DAG, VT, Val);
 }
 
-// Convert vector operation 'Op' to an equivalent predicated operation whereby
-// the original operation's type is used to construct a suitable predicate.
-// NOTE: The results for inactive lanes are undefined.
+// Convert vector operation 'Op' to an equivalent predicated operation whereby 
+// the original operation's type is used to construct a suitable predicate. 
+// NOTE: The results for inactive lanes are undefined. 
 SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
                                                    SelectionDAG &DAG,
-                                                   unsigned NewOp,
-                                                   bool OverrideNEON) const {
+                                                   unsigned NewOp, 
+                                                   bool OverrideNEON) const { 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
   auto Pg = getPredicateForVector(DAG, DL, VT);
 
-  if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
+  if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) { 
     EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
 
-    // Create list of operands by converting existing ones to scalable types.
+    // Create list of operands by converting existing ones to scalable types. 
     SmallVector<SDValue, 4> Operands = {Pg};
     for (const SDValue &V : Op->op_values()) {
       if (isa<CondCodeSDNode>(V)) {
@@ -17027,21 +17027,21 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
         continue;
       }
 
-      if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
-        EVT VTArg = VTNode->getVT().getVectorElementType();
-        EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
-        Operands.push_back(DAG.getValueType(NewVTArg));
-        continue;
-      }
-
-      assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
+      if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) { 
+        EVT VTArg = VTNode->getVT().getVectorElementType(); 
+        EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg); 
+        Operands.push_back(DAG.getValueType(NewVTArg)); 
+        continue; 
+      } 
+ 
+      assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && 
              "Only fixed length vectors are supported!");
       Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
     }
 
-    if (isMergePassthruOpcode(NewOp))
-      Operands.push_back(DAG.getUNDEF(ContainerVT));
-
+    if (isMergePassthruOpcode(NewOp)) 
+      Operands.push_back(DAG.getUNDEF(ContainerVT)); 
+ 
     auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
     return convertFromScalableVector(DAG, VT, ScalableRes);
   }
@@ -17050,228 +17050,228 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
 
   SmallVector<SDValue, 4> Operands = {Pg};
   for (const SDValue &V : Op->op_values()) {
-    assert((!V.getValueType().isVector() ||
-            V.getValueType().isScalableVector()) &&
+    assert((!V.getValueType().isVector() || 
+            V.getValueType().isScalableVector()) && 
            "Only scalable vectors are supported!");
     Operands.push_back(V);
   }
 
-  if (isMergePassthruOpcode(NewOp))
-    Operands.push_back(DAG.getUNDEF(VT));
-
+  if (isMergePassthruOpcode(NewOp)) 
+    Operands.push_back(DAG.getUNDEF(VT)); 
+ 
   return DAG.getNode(NewOp, DL, VT, Operands);
 }
-
-// If a fixed length vector operation has no side effects when applied to
-// undefined elements, we can safely use scalable vectors to perform the same
-// operation without needing to worry about predication.
-SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  assert(useSVEForFixedLengthVectorVT(VT) &&
-         "Only expected to lower fixed length vector operation!");
-  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
-
-  // Create list of operands by converting existing ones to scalable types.
-  SmallVector<SDValue, 4> Ops;
-  for (const SDValue &V : Op->op_values()) {
-    assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
-
-    // Pass through non-vector operands.
-    if (!V.getValueType().isVector()) {
-      Ops.push_back(V);
-      continue;
-    }
-
-    // "cast" fixed length vector to a scalable vector.
-    assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
-           "Only fixed length vectors are supported!");
-    Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
-  }
-
-  auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
-  return convertFromScalableVector(DAG, VT, ScalableRes);
-}
-
-SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
-    SelectionDAG &DAG) const {
-  SDLoc DL(ScalarOp);
-  SDValue AccOp = ScalarOp.getOperand(0);
-  SDValue VecOp = ScalarOp.getOperand(1);
-  EVT SrcVT = VecOp.getValueType();
-  EVT ResVT = SrcVT.getVectorElementType();
-
-  EVT ContainerVT = SrcVT;
-  if (SrcVT.isFixedLengthVector()) {
-    ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
-    VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
-  }
-
-  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
-  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
-
-  // Convert operands to Scalable.
-  AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
-                      DAG.getUNDEF(ContainerVT), AccOp, Zero);
-
-  // Perform reduction.
-  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
-                            Pg, AccOp, VecOp);
-
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
-}
-
-SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
-                                                       SelectionDAG &DAG) const {
-  SDLoc DL(ReduceOp);
-  SDValue Op = ReduceOp.getOperand(0);
-  EVT OpVT = Op.getValueType();
-  EVT VT = ReduceOp.getValueType();
-
-  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
-    return SDValue();
-
-  SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
-
-  switch (ReduceOp.getOpcode()) {
-  default:
-    return SDValue();
-  case ISD::VECREDUCE_OR:
-    return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
-  case ISD::VECREDUCE_AND: {
-    Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
-    return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
-  }
-  case ISD::VECREDUCE_XOR: {
-    SDValue ID =
-        DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
-    SDValue Cntp =
-        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
-    return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
-  }
-  }
-
-  return SDValue();
-}
-
-SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
-                                                   SDValue ScalarOp,
-                                                   SelectionDAG &DAG) const {
-  SDLoc DL(ScalarOp);
-  SDValue VecOp = ScalarOp.getOperand(0);
-  EVT SrcVT = VecOp.getValueType();
-
-  if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
-    EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
-    VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
-  }
-
-  // UADDV always returns an i64 result.
-  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
-                                                   SrcVT.getVectorElementType();
-  EVT RdxVT = SrcVT;
-  if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
-    RdxVT = getPackedSVEVectorVT(ResVT);
-
-  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
-  SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
-  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
-                            Rdx, DAG.getConstant(0, DL, MVT::i64));
-
-  // The VEC_REDUCE nodes expect an element size result.
-  if (ResVT != ScalarOp.getValueType())
-    Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
-
-  return Res;
-}
-
-SDValue
-AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
-    SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
-
-  EVT InVT = Op.getOperand(1).getValueType();
-  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
-  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
-  SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
-
-  // Convert the mask to a predicated (NOTE: We don't need to worry about
-  // inactive lanes since VSELECT is safe when given undefined elements).
-  EVT MaskVT = Op.getOperand(0).getValueType();
-  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
-  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
-  Mask = DAG.getNode(ISD::TRUNCATE, DL,
-                     MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
-
-  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
-                                Mask, Op1, Op2);
-
-  return convertFromScalableVector(DAG, VT, ScalableRes);
-}
-
-SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
-    SDValue Op, SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  EVT InVT = Op.getOperand(0).getValueType();
-  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
-
-  assert(useSVEForFixedLengthVectorVT(InVT) &&
-         "Only expected to lower fixed length vector operation!");
-  assert(Op.getValueType() == InVT.changeTypeToInteger() &&
-         "Expected integer result of the same bit length as the inputs!");
-
-  // Expand floating point vector comparisons.
-  if (InVT.isFloatingPoint())
-    return SDValue();
-
-  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
-  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
-  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
-
-  EVT CmpVT = Pg.getValueType();
-  auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
-                         {Pg, Op1, Op2, Op.getOperand(2)});
-
-  EVT PromoteVT = ContainerVT.changeTypeToInteger();
-  auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
-  return convertFromScalableVector(DAG, Op.getValueType(), Promote);
-}
-
-SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  SDLoc DL(Op);
-  EVT InVT = Op.getValueType();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  (void)TLI;
-
-  assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
-         InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
-         "Only expect to cast between legal scalable vector types!");
-  assert((VT.getVectorElementType() == MVT::i1) ==
-             (InVT.getVectorElementType() == MVT::i1) &&
-         "Cannot cast between data and predicate scalable vector types!");
-
-  if (InVT == VT)
-    return Op;
-
-  if (VT.getVectorElementType() == MVT::i1)
-    return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
-
-  EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
-  EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
-  assert((VT == PackedVT || InVT == PackedInVT) &&
-         "Cannot cast between unpacked scalable vector types!");
-
-  // Pack input if required.
-  if (InVT != PackedInVT)
-    Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
-
-  Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
-
-  // Unpack result if required.
-  if (VT != PackedVT)
-    Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
-
-  return Op;
-}
+ 
+// If a fixed length vector operation has no side effects when applied to 
+// undefined elements, we can safely use scalable vectors to perform the same 
+// operation without needing to worry about predication. 
+SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op, 
+                                                 SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+  assert(useSVEForFixedLengthVectorVT(VT) && 
+         "Only expected to lower fixed length vector operation!"); 
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); 
+ 
+  // Create list of operands by converting existing ones to scalable types. 
+  SmallVector<SDValue, 4> Ops; 
+  for (const SDValue &V : Op->op_values()) { 
+    assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); 
+ 
+    // Pass through non-vector operands. 
+    if (!V.getValueType().isVector()) { 
+      Ops.push_back(V); 
+      continue; 
+    } 
+ 
+    // "cast" fixed length vector to a scalable vector. 
+    assert(useSVEForFixedLengthVectorVT(V.getValueType()) && 
+           "Only fixed length vectors are supported!"); 
+    Ops.push_back(convertToScalableVector(DAG, ContainerVT, V)); 
+  } 
+ 
+  auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops); 
+  return convertFromScalableVector(DAG, VT, ScalableRes); 
+} 
+ 
+SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, 
+    SelectionDAG &DAG) const { 
+  SDLoc DL(ScalarOp); 
+  SDValue AccOp = ScalarOp.getOperand(0); 
+  SDValue VecOp = ScalarOp.getOperand(1); 
+  EVT SrcVT = VecOp.getValueType(); 
+  EVT ResVT = SrcVT.getVectorElementType(); 
+ 
+  EVT ContainerVT = SrcVT; 
+  if (SrcVT.isFixedLengthVector()) { 
+    ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT); 
+    VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); 
+  } 
+ 
+  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT); 
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i64); 
+ 
+  // Convert operands to Scalable. 
+  AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, 
+                      DAG.getUNDEF(ContainerVT), AccOp, Zero); 
+ 
+  // Perform reduction. 
+  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, 
+                            Pg, AccOp, VecOp); 
+ 
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero); 
+} 
+ 
+SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, 
+                                                       SelectionDAG &DAG) const { 
+  SDLoc DL(ReduceOp); 
+  SDValue Op = ReduceOp.getOperand(0); 
+  EVT OpVT = Op.getValueType(); 
+  EVT VT = ReduceOp.getValueType(); 
+ 
+  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1) 
+    return SDValue(); 
+ 
+  SDValue Pg = getPredicateForVector(DAG, DL, OpVT); 
+ 
+  switch (ReduceOp.getOpcode()) { 
+  default: 
+    return SDValue(); 
+  case ISD::VECREDUCE_OR: 
+    return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE); 
+  case ISD::VECREDUCE_AND: { 
+    Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg); 
+    return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE); 
+  } 
+  case ISD::VECREDUCE_XOR: { 
+    SDValue ID = 
+        DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64); 
+    SDValue Cntp = 
+        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op); 
+    return DAG.getAnyExtOrTrunc(Cntp, DL, VT); 
+  } 
+  } 
+ 
+  return SDValue(); 
+} 
+ 
+SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode, 
+                                                   SDValue ScalarOp, 
+                                                   SelectionDAG &DAG) const { 
+  SDLoc DL(ScalarOp); 
+  SDValue VecOp = ScalarOp.getOperand(0); 
+  EVT SrcVT = VecOp.getValueType(); 
+ 
+  if (useSVEForFixedLengthVectorVT(SrcVT, true)) { 
+    EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT); 
+    VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); 
+  } 
+ 
+  // UADDV always returns an i64 result. 
+  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 : 
+                                                   SrcVT.getVectorElementType(); 
+  EVT RdxVT = SrcVT; 
+  if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED) 
+    RdxVT = getPackedSVEVectorVT(ResVT); 
+ 
+  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT); 
+  SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp); 
+  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, 
+                            Rdx, DAG.getConstant(0, DL, MVT::i64)); 
+ 
+  // The VEC_REDUCE nodes expect an element size result. 
+  if (ResVT != ScalarOp.getValueType()) 
+    Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType()); 
+ 
+  return Res; 
+} 
+ 
+SDValue 
+AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op, 
+    SelectionDAG &DAG) const { 
+  EVT VT = Op.getValueType(); 
+  SDLoc DL(Op); 
+ 
+  EVT InVT = Op.getOperand(1).getValueType(); 
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); 
+  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1)); 
+  SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2)); 
+ 
+  // Convert the mask to a predicated (NOTE: We don't need to worry about 
+  // inactive lanes since VSELECT is safe when given undefined elements). 
+  EVT MaskVT = Op.getOperand(0).getValueType(); 
+  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT); 
+  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0)); 
+  Mask = DAG.getNode(ISD::TRUNCATE, DL, 
+                     MaskContainerVT.changeVectorElementType(MVT::i1), Mask); 
+ 
+  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, 
+                                Mask, Op1, Op2); 
+ 
+  return convertFromScalableVector(DAG, VT, ScalableRes); 
+} 
+ 
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE( 
+    SDValue Op, SelectionDAG &DAG) const { 
+  SDLoc DL(Op); 
+  EVT InVT = Op.getOperand(0).getValueType(); 
+  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); 
+ 
+  assert(useSVEForFixedLengthVectorVT(InVT) && 
+         "Only expected to lower fixed length vector operation!"); 
+  assert(Op.getValueType() == InVT.changeTypeToInteger() && 
+         "Expected integer result of the same bit length as the inputs!"); 
+ 
+  // Expand floating point vector comparisons. 
+  if (InVT.isFloatingPoint()) 
+    return SDValue(); 
+ 
+  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); 
+  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1)); 
+  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT); 
+ 
+  EVT CmpVT = Pg.getValueType(); 
+  auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT, 
+                         {Pg, Op1, Op2, Op.getOperand(2)}); 
+ 
+  EVT PromoteVT = ContainerVT.changeTypeToInteger(); 
+  auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT); 
+  return convertFromScalableVector(DAG, Op.getValueType(), Promote); 
+} 
+ 
+SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, 
+                                                 SelectionDAG &DAG) const { 
+  SDLoc DL(Op); 
+  EVT InVT = Op.getValueType(); 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 
+  (void)TLI; 
+ 
+  assert(VT.isScalableVector() && TLI.isTypeLegal(VT) && 
+         InVT.isScalableVector() && TLI.isTypeLegal(InVT) && 
+         "Only expect to cast between legal scalable vector types!"); 
+  assert((VT.getVectorElementType() == MVT::i1) == 
+             (InVT.getVectorElementType() == MVT::i1) && 
+         "Cannot cast between data and predicate scalable vector types!"); 
+ 
+  if (InVT == VT) 
+    return Op; 
+ 
+  if (VT.getVectorElementType() == MVT::i1) 
+    return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); 
+ 
+  EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType()); 
+  EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType()); 
+  assert((VT == PackedVT || InVT == PackedInVT) && 
+         "Cannot cast between unpacked scalable vector types!"); 
+ 
+  // Pack input if required. 
+  if (InVT != PackedInVT) 
+    Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op); 
+ 
+  Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op); 
+ 
+  // Unpack result if required. 
+  if (VT != PackedVT) 
+    Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); 
+ 
+  return Op; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
index 9550197159..535aa519f7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
@@ -72,51 +72,51 @@ enum NodeType : unsigned {
   ADC,
   SBC, // adc, sbc instructions
 
-  // Predicated instructions where inactive lanes produce undefined results.
+  // Predicated instructions where inactive lanes produce undefined results. 
   ADD_PRED,
   FADD_PRED,
-  FDIV_PRED,
-  FMA_PRED,
-  FMAXNM_PRED,
-  FMINNM_PRED,
-  FMUL_PRED,
-  FSUB_PRED,
-  MUL_PRED,
+  FDIV_PRED, 
+  FMA_PRED, 
+  FMAXNM_PRED, 
+  FMINNM_PRED, 
+  FMUL_PRED, 
+  FSUB_PRED, 
+  MUL_PRED, 
   SDIV_PRED,
-  SHL_PRED,
-  SMAX_PRED,
-  SMIN_PRED,
-  SRA_PRED,
-  SRL_PRED,
-  SUB_PRED,
+  SHL_PRED, 
+  SMAX_PRED, 
+  SMIN_PRED, 
+  SRA_PRED, 
+  SRL_PRED, 
+  SUB_PRED, 
   UDIV_PRED,
-  UMAX_PRED,
-  UMIN_PRED,
-
-  // Predicated instructions with the result of inactive lanes provided by the
-  // last operand.
-  FABS_MERGE_PASSTHRU,
-  FCEIL_MERGE_PASSTHRU,
-  FFLOOR_MERGE_PASSTHRU,
-  FNEARBYINT_MERGE_PASSTHRU,
-  FNEG_MERGE_PASSTHRU,
-  FRECPX_MERGE_PASSTHRU,
-  FRINT_MERGE_PASSTHRU,
-  FROUND_MERGE_PASSTHRU,
-  FROUNDEVEN_MERGE_PASSTHRU,
-  FSQRT_MERGE_PASSTHRU,
-  FTRUNC_MERGE_PASSTHRU,
-  FP_ROUND_MERGE_PASSTHRU,
-  FP_EXTEND_MERGE_PASSTHRU,
-  UINT_TO_FP_MERGE_PASSTHRU,
-  SINT_TO_FP_MERGE_PASSTHRU,
-  FCVTZU_MERGE_PASSTHRU,
-  FCVTZS_MERGE_PASSTHRU,
-  SIGN_EXTEND_INREG_MERGE_PASSTHRU,
-  ZERO_EXTEND_INREG_MERGE_PASSTHRU,
-  ABS_MERGE_PASSTHRU,
-  NEG_MERGE_PASSTHRU,
-
+  UMAX_PRED, 
+  UMIN_PRED, 
+
+  // Predicated instructions with the result of inactive lanes provided by the 
+  // last operand. 
+  FABS_MERGE_PASSTHRU, 
+  FCEIL_MERGE_PASSTHRU, 
+  FFLOOR_MERGE_PASSTHRU, 
+  FNEARBYINT_MERGE_PASSTHRU, 
+  FNEG_MERGE_PASSTHRU, 
+  FRECPX_MERGE_PASSTHRU, 
+  FRINT_MERGE_PASSTHRU, 
+  FROUND_MERGE_PASSTHRU, 
+  FROUNDEVEN_MERGE_PASSTHRU, 
+  FSQRT_MERGE_PASSTHRU, 
+  FTRUNC_MERGE_PASSTHRU, 
+  FP_ROUND_MERGE_PASSTHRU, 
+  FP_EXTEND_MERGE_PASSTHRU, 
+  UINT_TO_FP_MERGE_PASSTHRU, 
+  SINT_TO_FP_MERGE_PASSTHRU, 
+  FCVTZU_MERGE_PASSTHRU, 
+  FCVTZS_MERGE_PASSTHRU, 
+  SIGN_EXTEND_INREG_MERGE_PASSTHRU, 
+  ZERO_EXTEND_INREG_MERGE_PASSTHRU, 
+  ABS_MERGE_PASSTHRU, 
+  NEG_MERGE_PASSTHRU, 
+ 
   SETCC_MERGE_ZERO,
 
   // Arithmetic instructions which write flags.
@@ -219,18 +219,18 @@ enum NodeType : unsigned {
   SADDV,
   UADDV,
 
-  // Vector halving addition
-  SHADD,
-  UHADD,
-
+  // Vector halving addition 
+  SHADD, 
+  UHADD, 
+ 
   // Vector rounding halving addition
   SRHADD,
   URHADD,
 
-  // Absolute difference
-  UABD,
-  SABD,
-
+  // Absolute difference 
+  UABD, 
+  SABD, 
+ 
   // Vector across-lanes min/max
   // Only the lower result lane is defined.
   SMINV,
@@ -238,8 +238,8 @@ enum NodeType : unsigned {
   SMAXV,
   UMAXV,
 
-  SADDV_PRED,
-  UADDV_PRED,
+  SADDV_PRED, 
+  UADDV_PRED, 
   SMAXV_PRED,
   UMAXV_PRED,
   SMINV_PRED,
@@ -307,14 +307,14 @@ enum NodeType : unsigned {
   PTEST,
   PTRUE,
 
-  BITREVERSE_MERGE_PASSTHRU,
-  BSWAP_MERGE_PASSTHRU,
-  CTLZ_MERGE_PASSTHRU,
-  CTPOP_MERGE_PASSTHRU,
+  BITREVERSE_MERGE_PASSTHRU, 
+  BSWAP_MERGE_PASSTHRU, 
+  CTLZ_MERGE_PASSTHRU, 
+  CTPOP_MERGE_PASSTHRU, 
   DUP_MERGE_PASSTHRU,
   INDEX_VECTOR,
 
-  // Cast between vectors of the same element type but differ in length.
+  // Cast between vectors of the same element type but differ in length. 
   REINTERPRET_CAST,
 
   LD1_MERGE_ZERO,
@@ -424,11 +424,11 @@ enum NodeType : unsigned {
 
   LDP,
   STP,
-  STNP,
-
-  // Pseudo for a OBJC call that gets emitted together with a special `mov
-  // x29, x29` marker instruction.
-  CALL_RVMARKER
+  STNP, 
+ 
+  // Pseudo for a OBJC call that gets emitted together with a special `mov 
+  // x29, x29` marker instruction. 
+  CALL_RVMARKER 
 };
 
 } // end namespace AArch64ISD
@@ -438,14 +438,14 @@ namespace {
 // Any instruction that defines a 32-bit result zeros out the high half of the
 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
 // be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
+// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper 
+// 32 bits, they're probably just qualifying a CopyFromReg. 
 // FIXME: X86 also checks for CMOV here. Do we need something similar?
 static inline bool isDef32(const SDNode &N) {
   unsigned Opc = N.getOpcode();
   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
-         Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
-         Opc != ISD::AssertZext;
+         Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 
+         Opc != ISD::AssertZext; 
 }
 
 } // end anonymous namespace
@@ -784,7 +784,7 @@ public:
   /// illegal as the original, thus leading to an infinite legalisation loop.
   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
   /// vector types this override can be removed.
-  bool mergeStoresAfterLegalization(EVT VT) const override;
+  bool mergeStoresAfterLegalization(EVT VT) const override; 
 
 private:
   /// Keep a pointer to the AArch64Subtarget around so that we can
@@ -815,11 +815,11 @@ private:
                           SDValue ThisVal) const;
 
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
-
-  SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 
 
+  SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 
+  SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 
+ 
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
   bool isEligibleForTailCallOptimization(
@@ -903,28 +903,28 @@ private:
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
-                              bool OverrideNEON = false) const;
-  SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, 
+                              bool OverrideNEON = false) const; 
+  SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 
+  SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
@@ -939,17 +939,17 @@ private:
   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
 
-  SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
-                                               SelectionDAG &DAG) const;
-  SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
-                                               SelectionDAG &DAG) const;
+  SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 
+                                               SelectionDAG &DAG) const; 
+  SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 
+                                               SelectionDAG &DAG) const; 
   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
-  SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
-  SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
-                              SelectionDAG &DAG) const;
-  SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 
+  SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 
+  SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 
+                              SelectionDAG &DAG) const; 
+  SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 
+  SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 
   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
                                               SelectionDAG &DAG) const;
@@ -961,10 +961,10 @@ private:
                           bool Reciprocal) const override;
   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
                            int &ExtraSteps) const override;
-  SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
-                           const DenormalMode &Mode) const override;
-  SDValue getSqrtResultForDenormInput(SDValue Operand,
-                                      SelectionDAG &DAG) const override;
+  SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 
+                           const DenormalMode &Mode) const override; 
+  SDValue getSqrtResultForDenormInput(SDValue Operand, 
+                                      SelectionDAG &DAG) const override; 
   unsigned combineRepeatedFPDivisors() const override;
 
   ConstraintType getConstraintType(StringRef Constraint) const override;
@@ -996,7 +996,7 @@ private:
     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
   }
 
-  bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+  bool shouldRemoveExtendFromGSIndex(EVT VT) const override; 
   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -1023,21 +1023,21 @@ private:
   bool shouldLocalize(const MachineInstr &MI,
                       const TargetTransformInfo *TTI) const override;
 
-  // Normally SVE is only used for byte size vectors that do not fit within a
-  // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
-  // used for 64bit and 128bit vectors as well.
-  bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
-
-  // With the exception of data-predicate transitions, no instructions are
-  // required to cast between legal scalable vector types. However:
-  //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
-  //     is not universally useable.
-  //  2. Most unpacked integer types are not legal and thus integer extends
-  //     cannot be used to convert between unpacked and packed types.
-  // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
-  // to transition between unpacked and packed types of the same element type,
-  // with BITCAST used otherwise.
-  SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
+  // Normally SVE is only used for byte size vectors that do not fit within a 
+  // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 
+  // used for 64bit and 128bit vectors as well. 
+  bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 
+ 
+  // With the exception of data-predicate transitions, no instructions are 
+  // required to cast between legal scalable vector types. However: 
+  //  1. Packed and unpacked types have different bit lengths, meaning BITCAST 
+  //     is not universally useable. 
+  //  2. Most unpacked integer types are not legal and thus integer extends 
+  //     cannot be used to convert between unpacked and packed types. 
+  // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 
+  // to transition between unpacked and packed types of the same element type, 
+  // with BITCAST used otherwise. 
+  SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 
 };
 
 namespace AArch64 {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
index cf08f56e5b..eb03fce945 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
@@ -60,14 +60,14 @@ class AArch64Inst<Format f, string cstr> : Instruction {
   bits<2> Form    = F.Value;
 
   // Defaults
-  bit isWhile = 0;
-  bit isPTestLike = 0;
+  bit isWhile = 0; 
+  bit isPTestLike = 0; 
   FalseLanesEnum FalseLanes = FalseLanesNone;
   DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
   ElementSizeEnum ElementSize = ElementSizeNone;
 
-  let TSFlags{10}  = isPTestLike;
-  let TSFlags{9}   = isWhile;
+  let TSFlags{10}  = isPTestLike; 
+  let TSFlags{9}   = isWhile; 
   let TSFlags{8-7} = FalseLanes.Value;
   let TSFlags{6-3} = DestructiveInstType.Value;
   let TSFlags{2-0} = ElementSize.Value;
@@ -267,7 +267,7 @@ def adrplabel : Operand<i64> {
   let EncoderMethod = "getAdrLabelOpValue";
   let PrintMethod = "printAdrpLabel";
   let ParserMatchClass = AdrpOperand;
-  let OperandType = "OPERAND_PCREL";
+  let OperandType = "OPERAND_PCREL"; 
 }
 
 def AdrOperand : AsmOperandClass {
@@ -330,7 +330,7 @@ def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
 }
 
 def SImm8Operand : SImmOperand<8>;
-def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> {
+def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> { 
   let ParserMatchClass = SImm8Operand;
   let DecoderMethod = "DecodeSImm<8>";
 }
@@ -919,13 +919,13 @@ def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_1Operand;
 }
 
-// timm0_1 - as above, but use TargetConstant (TImmLeaf)
-def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 2;
-}]> {
-  let ParserMatchClass = Imm0_1Operand;
-}
-
+// timm0_1 - as above, but use TargetConstant (TImmLeaf) 
+def timm0_1 : Operand<i64>, TImmLeaf<i64, [{ 
+  return ((uint64_t)Imm) < 2; 
+}]> { 
+  let ParserMatchClass = Imm0_1Operand; 
+} 
+ 
 // imm0_15 predicate - True if the immediate is in the range [0,15]
 def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 16;
@@ -1301,9 +1301,9 @@ class SimpleSystemI<bit L, dag iops, string asm, string operands,
 }
 
 // System instructions which have an Rt register.
-class RtSystemI<bit L, dag oops, dag iops, string asm, string operands,
-                list<dag> pattern = []>
-    : BaseSystemI<L, oops, iops, asm, operands, pattern>,
+class RtSystemI<bit L, dag oops, dag iops, string asm, string operands, 
+                list<dag> pattern = []> 
+    : BaseSystemI<L, oops, iops, asm, operands, pattern>, 
       Sched<[WriteSys]> {
   bits<5> Rt;
   let Inst{4-0} = Rt;
@@ -1331,16 +1331,16 @@ class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
   let Inst{4-0} = Rt;
 }
 
-// System instructions that pass a register argument
-// This class assumes the register is for input rather than output.
-class RegInputSystemI<bits<4> CRm, bits<3> Op2, string asm,
-                      list<dag> pattern = []>
-    : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> {
-  let Inst{20-12} = 0b000110001;
-  let Inst{11-8} = CRm;
-  let Inst{7-5} = Op2;
-}
-
+// System instructions that pass a register argument 
+// This class assumes the register is for input rather than output. 
+class RegInputSystemI<bits<4> CRm, bits<3> Op2, string asm, 
+                      list<dag> pattern = []> 
+    : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> { 
+  let Inst{20-12} = 0b000110001; 
+  let Inst{11-8} = CRm; 
+  let Inst{7-5} = Op2; 
+} 
+ 
 // System instructions for transactional memory - no operand
 class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
     : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
@@ -1381,14 +1381,14 @@ def barrier_op : Operand<i32> {
   let PrintMethod = "printBarrierOption";
   let ParserMatchClass = BarrierAsmOperand;
 }
-def BarriernXSAsmOperand : AsmOperandClass {
-  let Name = "BarriernXS";
-  let ParserMethod = "tryParseBarriernXSOperand";
-}
-def barrier_nxs_op : Operand<i32> {
-  let PrintMethod = "printBarriernXSOption";
-  let ParserMatchClass = BarriernXSAsmOperand;
-}
+def BarriernXSAsmOperand : AsmOperandClass { 
+  let Name = "BarriernXS"; 
+  let ParserMethod = "tryParseBarriernXSOperand"; 
+} 
+def barrier_nxs_op : Operand<i32> { 
+  let PrintMethod = "printBarriernXSOption"; 
+  let ParserMatchClass = BarriernXSAsmOperand; 
+} 
 class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
                  list<dag> pattern = []>
     : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>,
@@ -1470,7 +1470,7 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
                        "mrs", "\t$Rt, $systemreg"> {
   bits<16> systemreg;
   let Inst{20-5} = systemreg;
-  let DecoderNamespace = "Fallback";
+  let DecoderNamespace = "Fallback"; 
 }
 
 // FIXME: Some of these def NZCV, others don't. Best way to model that?
@@ -1480,7 +1480,7 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
                        "msr", "\t$systemreg, $Rt"> {
   bits<16> systemreg;
   let Inst{20-5} = systemreg;
-  let DecoderNamespace = "Fallback";
+  let DecoderNamespace = "Fallback"; 
 }
 
 def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
@@ -1970,21 +1970,21 @@ class SignAuthTwoOperand<bits<4> opc, string asm,
   let Inst{4-0}   = Rd;
 }
 
-class ClearAuth<bits<1> data, string asm>
-  : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> {
-  bits<5> Rd;
-  let Inst{31-11} = 0b110110101100000101000;
-  let Inst{10} = data;
-  let Inst{9-5} = 0b11111;
-  let Inst{4-0} = Rd;
-}
-
+class ClearAuth<bits<1> data, string asm> 
+  : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> { 
+  bits<5> Rd; 
+  let Inst{31-11} = 0b110110101100000101000; 
+  let Inst{10} = data; 
+  let Inst{9-5} = 0b11111; 
+  let Inst{4-0} = Rd; 
+} 
+ 
 // Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions
 class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops>
     : I<(outs), iops, asm, ops, "", []>,
       Sched<[WriteI, ReadI, ReadI]> {
   let Uses = [NZCV];
-  let Defs = [NZCV];
+  let Defs = [NZCV]; 
   bits<5> Rn;
   let Inst{31}    = sf;
   let Inst{30-15} = 0b0111010000000000;
@@ -3972,7 +3972,7 @@ class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
                      (outs GPR64sp:$wback, regtype:$Rt),
                      (ins GPR64sp:$Rn, simm9:$offset), asm,
                      "$Rn = $wback,@earlyclobber $wback", []>,
-      Sched<[WriteAdr, WriteLD]>;
+      Sched<[WriteAdr, WriteLD]>; 
 
 let mayStore = 1, mayLoad = 0 in
 class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -4018,7 +4018,7 @@ class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
                       (outs GPR64sp:$wback, regtype:$Rt),
                       (ins GPR64sp:$Rn, simm9:$offset),
                       asm, "$Rn = $wback,@earlyclobber $wback", []>,
-      Sched<[WriteAdr, WriteLD]>;
+      Sched<[WriteAdr, WriteLD]>; 
 
 let mayStore = 1, mayLoad = 0 in
 class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -4115,7 +4115,7 @@ class LoadPairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
     : BaseLoadStorePairPreIdx<opc, V, 1,
                               (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
                               (ins GPR64sp:$Rn, indextype:$offset), asm>,
-      Sched<[WriteAdr, WriteLD, WriteLDHi]>;
+      Sched<[WriteAdr, WriteLD, WriteLDHi]>; 
 
 let mayStore = 1, mayLoad = 0 in
 class StorePairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -4156,7 +4156,7 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
     : BaseLoadStorePairPostIdx<opc, V, 1,
                               (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
                               (ins GPR64sp:$Rn, idxtype:$offset), asm>,
-      Sched<[WriteAdr, WriteLD, WriteLDHi]>;
+      Sched<[WriteAdr, WriteLD, WriteLDHi]>; 
 
 let mayStore = 1, mayLoad = 0 in
 class StorePairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -7874,9 +7874,9 @@ class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
 
 multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
   def v4bf16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
-                                           v2f32, v4bf16>;
+                                           v2f32, v4bf16>; 
   def v8bf16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
-                                           v4f32, v8bf16>;
+                                           v4f32, v8bf16>; 
 }
 
 class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
@@ -7894,7 +7894,7 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
                                  (InputType RegType:$Rn),
                                  (InputType (bitconvert (AccumType
                                     (AArch64duplane32 (v4f32 V128:$Rm),
-                                        VectorIndexS:$idx)))))))]> {
+                                        VectorIndexS:$idx)))))))]> { 
 
   bits<2> idx;
   let Inst{21}    = idx{0};  // L
@@ -7904,16 +7904,16 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
 multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
 
   def v4bf16  : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
-                                               ".2h", V64, v2f32, v4bf16>;
+                                               ".2h", V64, v2f32, v4bf16>; 
   def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
-                                              ".2h", V128, v4f32, v8bf16>;
+                                              ".2h", V128, v4f32, v8bf16>; 
 }
 
 class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
   : BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
               [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
-                                               (v8bf16 V128:$Rn),
-                                               (v8bf16 V128:$Rm)))]> {
+                                               (v8bf16 V128:$Rn), 
+                                               (v8bf16 V128:$Rm)))]> { 
   let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
 }
 
@@ -7923,10 +7923,10 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
       "{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
           [(set (v4f32 V128:$dst),
                 (v4f32 (OpNode (v4f32 V128:$Rd),
-                               (v8bf16 V128:$Rn),
-                               (v8bf16
+                               (v8bf16 V128:$Rn), 
+                               (v8bf16 
                                   (AArch64duplane16 (v8bf16 V128_lo:$Rm),
-                                      VectorIndexH:$idx)))))]>,
+                                      VectorIndexH:$idx)))))]>, 
     Sched<[WriteV]> {
   bits<5> Rd;
   bits<5> Rn;
@@ -7950,8 +7950,8 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm>
                                 V128, asm, ".4s",
                           [(set (v4f32 V128:$dst),
                                 (int_aarch64_neon_bfmmla (v4f32 V128:$Rd),
-                                                         (v8bf16 V128:$Rn),
-                                                         (v8bf16 V128:$Rm)))]> {
+                                                         (v8bf16 V128:$Rn), 
+                                                         (v8bf16 V128:$Rm)))]> { 
   let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
                                     ", $Rm", ".8h", "}");
 }
@@ -10629,14 +10629,14 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
                                               (v4f16 V64:$Rn),
                                               (v4f16 V64:$Rm),
-                                              (i32 rottype:$rot)))]>;
+                                              (i32 rottype:$rot)))]>; 
 
   def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
               asm, ".8h",
               [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
                                                (v8f16 V128:$Rn),
                                                (v8f16 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
   }
 
   let Predicates = [HasComplxNum, HasNEON] in {
@@ -10645,21 +10645,21 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
                                               (v2f32 V64:$Rn),
                                               (v2f32 V64:$Rm),
-                                              (i32 rottype:$rot)))]>;
+                                              (i32 rottype:$rot)))]>; 
 
   def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
               asm, ".4s",
               [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
                                                (v4f32 V128:$Rn),
                                                (v4f32 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
 
   def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
               asm, ".2d",
               [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
                                                (v2f64 V128:$Rn),
                                                (v2f64 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
   }
 }
 
@@ -10701,14 +10701,14 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
                                               (v4f16 V64:$Rn),
                                               (v4f16 V64:$Rm),
-                                              (i32 rottype:$rot)))]>;
+                                              (i32 rottype:$rot)))]>; 
 
   def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
               rottype, asm, ".8h",
               [(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
                                                (v8f16 V128:$Rn),
                                                (v8f16 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
   }
 
   let Predicates = [HasComplxNum, HasNEON] in {
@@ -10717,21 +10717,21 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
                                               (v2f32 V64:$Rn),
                                               (v2f32 V64:$Rm),
-                                              (i32 rottype:$rot)))]>;
+                                              (i32 rottype:$rot)))]>; 
 
   def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
               rottype, asm, ".4s",
               [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
                                                (v4f32 V128:$Rn),
                                                (v4f32 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
 
   def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
               rottype, asm, ".2d",
               [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
                                                (v2f64 V128:$Rn),
                                                (v2f64 V128:$Rm),
-                                               (i32 rottype:$rot)))]>;
+                                               (i32 rottype:$rot)))]>; 
   }
 }
 
@@ -11259,35 +11259,35 @@ multiclass STOPregister<string asm, string instr> {
                     !cast<Instruction>(instr # "X")>;
 }
 
-class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
-                        dag iops, dag oops, list<dag> pat>
-    : I<oops, iops, asm_inst, asm_ops, "", pat>,
-      Sched<[]> /* FIXME: fill in scheduling details once known */ {
-  bits<5> Rt;
-  bits<5> Rn;
-  let Inst{31-21} = 0b11111000001;
-  let Inst{15}    = 1;
-  let Inst{14-12} = opc;
-  let Inst{11-10} = 0b00;
-  let Inst{9-5}   = Rn;
-  let Inst{4-0}   = Rt;
-
-  let Predicates = [HasV8_7a];
-}
-
-class LoadStore64B<bits<3> opc, string asm_inst, dag iops, dag oops,
-                      list<dag> pat = []>
-    : LoadStore64B_base<opc, asm_inst, "\t$Rt, [$Rn]", iops, oops, pat> {
-  let Inst{20-16} = 0b11111;
-}
-
-class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []>
-    : LoadStore64B_base<opc, asm_inst, "\t$Rs, $Rt, [$Rn]",
-                       (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs GPR64:$Rs), pat> {
-  bits<5> Rs;
-  let Inst{20-16} = Rs;
-}
-
+class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops, 
+                        dag iops, dag oops, list<dag> pat> 
+    : I<oops, iops, asm_inst, asm_ops, "", pat>, 
+      Sched<[]> /* FIXME: fill in scheduling details once known */ { 
+  bits<5> Rt; 
+  bits<5> Rn; 
+  let Inst{31-21} = 0b11111000001; 
+  let Inst{15}    = 1; 
+  let Inst{14-12} = opc; 
+  let Inst{11-10} = 0b00; 
+  let Inst{9-5}   = Rn; 
+  let Inst{4-0}   = Rt; 
+ 
+  let Predicates = [HasV8_7a]; 
+} 
+ 
+class LoadStore64B<bits<3> opc, string asm_inst, dag iops, dag oops, 
+                      list<dag> pat = []> 
+    : LoadStore64B_base<opc, asm_inst, "\t$Rt, [$Rn]", iops, oops, pat> { 
+  let Inst{20-16} = 0b11111; 
+} 
+ 
+class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []> 
+    : LoadStore64B_base<opc, asm_inst, "\t$Rs, $Rt, [$Rn]", 
+                       (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs GPR64:$Rs), pat> { 
+  bits<5> Rs; 
+  let Inst{20-16} = Rs; 
+} 
+ 
 //----------------------------------------------------------------------------
 // Allow the size specifier tokens to be upper case, not just lower.
 def : TokenAlias<".4B", ".4b">;  // Add dot product
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
index 25656fac1d..b7d5014166 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
@@ -88,29 +88,29 @@ def G_DUP: AArch64GenericInstruction {
   let InOperandList = (ins type1:$lane);
   let hasSideEffects = 0;
 }
-
-// Represents a lane duplicate operation.
-def G_DUPLANE8 : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src, type1:$lane);
-  let hasSideEffects = 0;
-}
-def G_DUPLANE16 : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src, type1:$lane);
-  let hasSideEffects = 0;
-}
-def G_DUPLANE32 : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src, type1:$lane);
-  let hasSideEffects = 0;
-}
-def G_DUPLANE64 : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src, type1:$lane);
-  let hasSideEffects = 0;
-}
-
+ 
+// Represents a lane duplicate operation. 
+def G_DUPLANE8 : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src, type1:$lane); 
+  let hasSideEffects = 0; 
+} 
+def G_DUPLANE16 : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src, type1:$lane); 
+  let hasSideEffects = 0; 
+} 
+def G_DUPLANE32 : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src, type1:$lane); 
+  let hasSideEffects = 0; 
+} 
+def G_DUPLANE64 : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src, type1:$lane); 
+  let hasSideEffects = 0; 
+} 
+ 
 // Represents a trn1 instruction. Produced post-legalization from
 // G_SHUFFLE_VECTORs with appropriate masks.
 def G_TRN1 : AArch64GenericInstruction {
@@ -134,28 +134,28 @@ def G_EXT: AArch64GenericInstruction {
   let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
 }
 
-// Represents a vector G_ASHR with an immediate.
-def G_VASHR : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
-}
-
-// Represents a vector G_LSHR with an immediate.
-def G_VLSHR : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
-}
-
-// Represents an integer to FP conversion on the FPR bank.
-def G_SITOF : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
-}
-def G_UITOF : AArch64GenericInstruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src);
-}
-
+// Represents a vector G_ASHR with an immediate. 
+def G_VASHR : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); 
+} 
+ 
+// Represents a vector G_LSHR with an immediate. 
+def G_VLSHR : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src1, untyped_imm_0:$imm); 
+} 
+ 
+// Represents an integer to FP conversion on the FPR bank. 
+def G_SITOF : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src); 
+} 
+def G_UITOF : AArch64GenericInstruction { 
+  let OutOperandList = (outs type0:$dst); 
+  let InOperandList = (ins type0:$src); 
+} 
+ 
 def : GINodeEquiv<G_REV16, AArch64rev16>;
 def : GINodeEquiv<G_REV32, AArch64rev32>;
 def : GINodeEquiv<G_REV64, AArch64rev64>;
@@ -164,21 +164,21 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
 def : GINodeEquiv<G_ZIP1, AArch64zip1>;
 def : GINodeEquiv<G_ZIP2, AArch64zip2>;
 def : GINodeEquiv<G_DUP, AArch64dup>;
-def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
-def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
-def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
-def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>; 
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>; 
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>; 
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>; 
 def : GINodeEquiv<G_TRN1, AArch64trn1>;
 def : GINodeEquiv<G_TRN2, AArch64trn2>;
 def : GINodeEquiv<G_EXT, AArch64ext>;
-def : GINodeEquiv<G_VASHR, AArch64vashr>;
-def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
-def : GINodeEquiv<G_SITOF, AArch64sitof>;
-def : GINodeEquiv<G_UITOF, AArch64uitof>;
-
-def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
-
-// These are patterns that we only use for GlobalISel via the importer.
-def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
-                     (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
-           (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
+def : GINodeEquiv<G_VASHR, AArch64vashr>; 
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>; 
+def : GINodeEquiv<G_SITOF, AArch64sitof>; 
+def : GINodeEquiv<G_UITOF, AArch64uitof>; 
+ 
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; 
+ 
+// These are patterns that we only use for GlobalISel via the importer. 
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)), 
+                     (vector_extract (v2f32 FPR64:$Rn), (i64 1)))), 
+           (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>; 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6b38e216a8..fc3e238182 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -107,13 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
     break;
-  case TargetOpcode::STATEPOINT:
-    NumBytes = StatepointOpers(&MI).getNumPatchBytes();
-    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
-    // No patch bytes means a normal call inst is emitted
-    if (NumBytes == 0)
-      NumBytes = 4;
-    break;
+  case TargetOpcode::STATEPOINT: 
+    NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 
+    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); 
+    // No patch bytes means a normal call inst is emitted 
+    if (NumBytes == 0) 
+      NumBytes = 4; 
+    break; 
   case AArch64::TLSDESC_CALLSEQ:
     // This gets lowered to an instruction sequence which takes 16 bytes
     NumBytes = 16;
@@ -294,31 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     }
   }
 
-  // If we're allowed to modify and the block ends in a unconditional branch
-  // which could simply fallthrough, remove the branch.  (Note: This case only
-  // matters when we can't understand the whole sequence, otherwise it's also
-  // handled by BranchFolding.cpp.)
-  if (AllowModify && isUncondBranchOpcode(LastOpc) &&
-      MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
-    LastInst->eraseFromParent();
-    LastInst = SecondLastInst;
-    LastOpc = LastInst->getOpcode();
-    if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
-      assert(!isUncondBranchOpcode(LastOpc) &&
-             "unreachable unconditional branches removed above");
-
-      if (isCondBranchOpcode(LastOpc)) {
-        // Block ends with fall-through condbranch.
-        parseCondBranch(LastInst, TBB, Cond);
-        return false;
-      }
-      return true; // Can't handle indirect branch.
-    } else {
-      SecondLastInst = &*I;
-      SecondLastOpc = SecondLastInst->getOpcode();
-    }
-  }
-
+  // If we're allowed to modify and the block ends in a unconditional branch 
+  // which could simply fallthrough, remove the branch.  (Note: This case only 
+  // matters when we can't understand the whole sequence, otherwise it's also 
+  // handled by BranchFolding.cpp.) 
+  if (AllowModify && isUncondBranchOpcode(LastOpc) && 
+      MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) { 
+    LastInst->eraseFromParent(); 
+    LastInst = SecondLastInst; 
+    LastOpc = LastInst->getOpcode(); 
+    if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { 
+      assert(!isUncondBranchOpcode(LastOpc) && 
+             "unreachable unconditional branches removed above"); 
+ 
+      if (isCondBranchOpcode(LastOpc)) { 
+        // Block ends with fall-through condbranch. 
+        parseCondBranch(LastInst, TBB, Cond); 
+        return false; 
+      } 
+      return true; // Can't handle indirect branch. 
+    } else { 
+      SecondLastInst = &*I; 
+      SecondLastOpc = SecondLastInst->getOpcode(); 
+    } 
+  } 
+ 
   // If there are three terminators, we don't know what sort of block this is.
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
     return true;
@@ -353,56 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
-                                              MachineBranchPredicate &MBP,
-                                              bool AllowModify) const {
-  // For the moment, handle only a block which ends with a cb(n)zx followed by
-  // a fallthrough.  Why this?  Because it is a common form.
-  // TODO: Should we handle b.cc?
-
-  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
-  if (I == MBB.end())
-    return true;
-
-  // Skip over SpeculationBarrierEndBB terminators
-  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
-      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
-    --I;
-  }
-
-  if (!isUnpredicatedTerminator(*I))
-    return true;
-
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = &*I;
-  unsigned LastOpc = LastInst->getOpcode();
-  if (!isCondBranchOpcode(LastOpc))
-    return true;
-
-  switch (LastOpc) {
-  default:
-    return true;
-  case AArch64::CBZW:
-  case AArch64::CBZX:
-  case AArch64::CBNZW:
-  case AArch64::CBNZX:
-    break;
-  };
-
-  MBP.TrueDest = LastInst->getOperand(1).getMBB();
-  assert(MBP.TrueDest && "expected!");
-  MBP.FalseDest = MBB.getNextNode();
-
-  MBP.ConditionDef = nullptr;
-  MBP.SingleUseCondition = false;
-
-  MBP.LHS = LastInst->getOperand(0);
-  MBP.RHS = MachineOperand::CreateImm(0);
-  MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
-                                            : MachineBranchPredicate::PRED_EQ;
-  return false;
-}
-
+bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, 
+                                              MachineBranchPredicate &MBP, 
+                                              bool AllowModify) const { 
+  // For the moment, handle only a block which ends with a cb(n)zx followed by 
+  // a fallthrough.  Why this?  Because it is a common form. 
+  // TODO: Should we handle b.cc? 
+ 
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 
+  if (I == MBB.end()) 
+    return true; 
+ 
+  // Skip over SpeculationBarrierEndBB terminators 
+  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB || 
+      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) { 
+    --I; 
+  } 
+ 
+  if (!isUnpredicatedTerminator(*I)) 
+    return true; 
+ 
+  // Get the last instruction in the block. 
+  MachineInstr *LastInst = &*I; 
+  unsigned LastOpc = LastInst->getOpcode(); 
+  if (!isCondBranchOpcode(LastOpc)) 
+    return true; 
+ 
+  switch (LastOpc) { 
+  default: 
+    return true; 
+  case AArch64::CBZW: 
+  case AArch64::CBZX: 
+  case AArch64::CBNZW: 
+  case AArch64::CBNZX: 
+    break; 
+  }; 
+ 
+  MBP.TrueDest = LastInst->getOperand(1).getMBB(); 
+  assert(MBP.TrueDest && "expected!"); 
+  MBP.FalseDest = MBB.getNextNode(); 
+ 
+  MBP.ConditionDef = nullptr; 
+  MBP.SingleUseCondition = false; 
+ 
+  MBP.LHS = LastInst->getOperand(0); 
+  MBP.RHS = MachineOperand::CreateImm(0); 
+  MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE 
+                                            : MachineBranchPredicate::PRED_EQ; 
+  return false; 
+} 
+ 
 bool AArch64InstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond[0].getImm() != -1) {
@@ -1119,13 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   switch (MI.getOpcode()) {
   default:
     break;
-  case AArch64::PTEST_PP:
-    SrcReg = MI.getOperand(0).getReg();
-    SrcReg2 = MI.getOperand(1).getReg();
-    // Not sure about the mask and value for now...
-    CmpMask = ~0;
-    CmpValue = 0;
-    return true;
+  case AArch64::PTEST_PP: 
+    SrcReg = MI.getOperand(0).getReg(); 
+    SrcReg2 = MI.getOperand(1).getReg(); 
+    // Not sure about the mask and value for now... 
+    CmpMask = ~0; 
+    CmpValue = 0; 
+    return true; 
   case AArch64::SUBSWrr:
   case AArch64::SUBSWrs:
   case AArch64::SUBSWrx:
@@ -1281,9 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs(
     return true;
 
   // From must be above To.
-  assert(std::any_of(
-      ++To.getReverse(), To->getParent()->rend(),
-      [From](MachineInstr &MI) { return MI.getIterator() == From; }));
+  assert(std::any_of( 
+      ++To.getReverse(), To->getParent()->rend(), 
+      [From](MachineInstr &MI) { return MI.getIterator() == From; })); 
 
   // We iterate backward starting at \p To until we hit \p From.
   for (const MachineInstr &Instr :
@@ -1296,127 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs(
   return false;
 }
 
-/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
-/// operation which could set the flags in an identical manner
-bool AArch64InstrInfo::optimizePTestInstr(
-    MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
-    const MachineRegisterInfo *MRI) const {
-  auto *Mask = MRI->getUniqueVRegDef(MaskReg);
-  auto *Pred = MRI->getUniqueVRegDef(PredReg);
-  auto NewOp = Pred->getOpcode();
-  bool OpChanged = false;
-
-  unsigned MaskOpcode = Mask->getOpcode();
-  unsigned PredOpcode = Pred->getOpcode();
-  bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
-  bool PredIsWhileLike = isWhileOpcode(PredOpcode);
-
-  if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
-    // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
-    // deactivate any lanes OTHER_INST might set.
-    uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
-    uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
-
-    // Must be an all active predicate of matching element size.
-    if ((PredElementSize != MaskElementSize) ||
-        (Mask->getOperand(1).getImm() != 31))
-      return false;
-
-    // Fallthough to simply remove the PTEST.
-  } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
-    // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
-    // instruction that sets the flags as PTEST would.
-
-    // Fallthough to simply remove the PTEST.
-  } else if (PredIsPTestLike) {
-    // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
-    // instructions use the same predicate.
-    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
-    if (Mask != PTestLikeMask)
-      return false;
-
-    // Fallthough to simply remove the PTEST.
-  } else {
-    switch (Pred->getOpcode()) {
-    case AArch64::BRKB_PPzP:
-    case AArch64::BRKPB_PPzPP: {
-      // Op 0 is chain, 1 is the mask, 2 the previous predicate to
-      // propagate, 3 the new predicate.
-
-      // Check to see if our mask is the same as the brkpb's. If
-      // not the resulting flag bits may be different and we
-      // can't remove the ptest.
-      auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
-      if (Mask != PredMask)
-        return false;
-
-      // Switch to the new opcode
-      NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
-                                                      : AArch64::BRKPBS_PPzPP;
-      OpChanged = true;
-      break;
-    }
-    case AArch64::BRKN_PPzP: {
-      auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
-      if (Mask != PredMask)
-        return false;
-
-      NewOp = AArch64::BRKNS_PPzP;
-      OpChanged = true;
-      break;
-    }
-    default:
-      // Bail out if we don't recognize the input
-      return false;
-    }
-  }
-
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
-
-  // If the predicate is in a different block (possibly because its been
-  // hoisted out), then assume the flags are set in between statements.
-  if (Pred->getParent() != PTest->getParent())
-    return false;
-
-  // If another instruction between the propagation and test sets the
-  // flags, don't remove the ptest.
-  MachineBasicBlock::iterator I = Pred, E = PTest;
-  ++I; // Skip past the predicate op itself.
-  for (; I != E; ++I) {
-    const MachineInstr &Inst = *I;
-
-    // TODO: If the ptest flags are unused, we could still remove it.
-    if (Inst.modifiesRegister(AArch64::NZCV, TRI))
-      return false;
-  }
-
-  // If we pass all the checks, it's safe to remove the PTEST and use the flags
-  // as they are prior to PTEST. Sometimes this requires the tested PTEST
-  // operand to be replaced with an equivalent instruction that also sets the
-  // flags.
-  Pred->setDesc(get(NewOp));
-  PTest->eraseFromParent();
-  if (OpChanged) {
-    bool succeeded = UpdateOperandRegClass(*Pred);
-    (void)succeeded;
-    assert(succeeded && "Operands have incompatible register classes!");
-    Pred->addRegisterDefined(AArch64::NZCV, TRI);
-  }
-
-  // Ensure that the flags def is live.
-  if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
-    unsigned i = 0, e = Pred->getNumOperands();
-    for (; i != e; ++i) {
-      MachineOperand &MO = Pred->getOperand(i);
-      if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
-        MO.setIsDead(false);
-        break;
-      }
-    }
-  }
-  return true;
-}
-
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating 
+/// operation which could set the flags in an identical manner 
+bool AArch64InstrInfo::optimizePTestInstr( 
+    MachineInstr *PTest, unsigned MaskReg, unsigned PredReg, 
+    const MachineRegisterInfo *MRI) const { 
+  auto *Mask = MRI->getUniqueVRegDef(MaskReg); 
+  auto *Pred = MRI->getUniqueVRegDef(PredReg); 
+  auto NewOp = Pred->getOpcode(); 
+  bool OpChanged = false; 
+ 
+  unsigned MaskOpcode = Mask->getOpcode(); 
+  unsigned PredOpcode = Pred->getOpcode(); 
+  bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode); 
+  bool PredIsWhileLike = isWhileOpcode(PredOpcode); 
+ 
+  if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) { 
+    // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't 
+    // deactivate any lanes OTHER_INST might set. 
+    uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode); 
+    uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode); 
+ 
+    // Must be an all active predicate of matching element size. 
+    if ((PredElementSize != MaskElementSize) || 
+        (Mask->getOperand(1).getImm() != 31)) 
+      return false; 
+ 
+    // Fallthough to simply remove the PTEST. 
+  } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) { 
+    // For PTEST(PG, PG), PTEST is redundant when PG is the result of an 
+    // instruction that sets the flags as PTEST would. 
+ 
+    // Fallthough to simply remove the PTEST. 
+  } else if (PredIsPTestLike) { 
+    // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both 
+    // instructions use the same predicate. 
+    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 
+    if (Mask != PTestLikeMask) 
+      return false; 
+ 
+    // Fallthough to simply remove the PTEST. 
+  } else { 
+    switch (Pred->getOpcode()) { 
+    case AArch64::BRKB_PPzP: 
+    case AArch64::BRKPB_PPzPP: { 
+      // Op 0 is chain, 1 is the mask, 2 the previous predicate to 
+      // propagate, 3 the new predicate. 
+ 
+      // Check to see if our mask is the same as the brkpb's. If 
+      // not the resulting flag bits may be different and we 
+      // can't remove the ptest. 
+      auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 
+      if (Mask != PredMask) 
+        return false; 
+ 
+      // Switch to the new opcode 
+      NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP 
+                                                      : AArch64::BRKPBS_PPzPP; 
+      OpChanged = true; 
+      break; 
+    } 
+    case AArch64::BRKN_PPzP: { 
+      auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); 
+      if (Mask != PredMask) 
+        return false; 
+ 
+      NewOp = AArch64::BRKNS_PPzP; 
+      OpChanged = true; 
+      break; 
+    } 
+    default: 
+      // Bail out if we don't recognize the input 
+      return false; 
+    } 
+  } 
+ 
+  const TargetRegisterInfo *TRI = &getRegisterInfo(); 
+ 
+  // If the predicate is in a different block (possibly because its been 
+  // hoisted out), then assume the flags are set in between statements. 
+  if (Pred->getParent() != PTest->getParent()) 
+    return false; 
+ 
+  // If another instruction between the propagation and test sets the 
+  // flags, don't remove the ptest. 
+  MachineBasicBlock::iterator I = Pred, E = PTest; 
+  ++I; // Skip past the predicate op itself. 
+  for (; I != E; ++I) { 
+    const MachineInstr &Inst = *I; 
+ 
+    // TODO: If the ptest flags are unused, we could still remove it. 
+    if (Inst.modifiesRegister(AArch64::NZCV, TRI)) 
+      return false; 
+  } 
+ 
+  // If we pass all the checks, it's safe to remove the PTEST and use the flags 
+  // as they are prior to PTEST. Sometimes this requires the tested PTEST 
+  // operand to be replaced with an equivalent instruction that also sets the 
+  // flags. 
+  Pred->setDesc(get(NewOp)); 
+  PTest->eraseFromParent(); 
+  if (OpChanged) { 
+    bool succeeded = UpdateOperandRegClass(*Pred); 
+    (void)succeeded; 
+    assert(succeeded && "Operands have incompatible register classes!"); 
+    Pred->addRegisterDefined(AArch64::NZCV, TRI); 
+  } 
+ 
+  // Ensure that the flags def is live. 
+  if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) { 
+    unsigned i = 0, e = Pred->getNumOperands(); 
+    for (; i != e; ++i) { 
+      MachineOperand &MO = Pred->getOperand(i); 
+      if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) { 
+        MO.setIsDead(false); 
+        break; 
+      } 
+    } 
+  } 
+  return true; 
+} 
+ 
 /// Try to optimize a compare instruction. A compare instruction is an
 /// instruction which produces AArch64::NZCV. It can be truly compare
 /// instruction
@@ -1455,9 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
     return true;
   }
 
-  if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
-    return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
-
+  if (CmpInstr.getOpcode() == AArch64::PTEST_PP) 
+    return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); 
+ 
   // Continue only if we have a "ri" where immediate is zero.
   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
   // function.
@@ -2274,24 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
   return true;
 }
 
-Optional<ExtAddrMode>
-AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
-                                          const TargetRegisterInfo *TRI) const {
-  const MachineOperand *Base; // Filled with the base operand of MI.
-  int64_t Offset;             // Filled with the offset of MI.
-  bool OffsetIsScalable;
-  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
-    return None;
-
-  if (!Base->isReg())
-    return None;
-  ExtAddrMode AM;
-  AM.BaseReg = Base->getReg();
-  AM.Displacement = Offset;
-  AM.ScaledReg = 0;
-  return AM;
-}
-
+Optional<ExtAddrMode> 
+AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI, 
+                                          const TargetRegisterInfo *TRI) const { 
+  const MachineOperand *Base; // Filled with the base operand of MI. 
+  int64_t Offset;             // Filled with the offset of MI. 
+  bool OffsetIsScalable; 
+  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI)) 
+    return None; 
+ 
+  if (!Base->isReg()) 
+    return None; 
+  ExtAddrMode AM; 
+  AM.BaseReg = Base->getReg(); 
+  AM.Displacement = Offset; 
+  AM.ScaledReg = 0; 
+  return AM; 
+} 
+ 
 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
     bool &OffsetIsScalable, unsigned &Width,
@@ -3290,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
       Opc = AArch64::STR_PXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 4:
@@ -3334,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
       Opc = AArch64::STR_ZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 24:
@@ -3356,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
       Opc = AArch64::STR_ZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 48:
@@ -3367,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
       Opc = AArch64::STR_ZZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 64:
@@ -3378,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
       Opc = AArch64::STR_ZZZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   }
@@ -3444,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
       Opc = AArch64::LDR_PXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 4:
@@ -3488,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
       Opc = AArch64::LDR_ZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 24:
@@ -3510,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
       Opc = AArch64::LDR_ZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 48:
@@ -3521,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
       Opc = AArch64::LDR_ZZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   case 64:
@@ -3532,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
       Opc = AArch64::LDR_ZZZZXI;
-      StackID = TargetStackID::ScalableVector;
+      StackID = TargetStackID::ScalableVector; 
     }
     break;
   }
@@ -3559,47 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
                 });
 }
 
-void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
-    const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
-  // The smallest scalable element supported by scaled SVE addressing
-  // modes are predicates, which are 2 scalable bytes in size. So the scalable
-  // byte offset must always be a multiple of 2.
-  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
-  // VGSized offsets are divided by '2', because the VG register is the
-  // the number of 64bit granules as opposed to 128bit vector chunks,
-  // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
-  // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
-  // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
-  ByteSized = Offset.getFixed();
-  VGSized = Offset.getScalable() / 2;
-}
-
-/// Returns the offset in parts to which this frame offset can be
-/// decomposed for the purpose of describing a frame offset.
-/// For non-scalable offsets this is simply its byte size.
-void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
-    const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
-    int64_t &NumDataVectors) {
-  // The smallest scalable element supported by scaled SVE addressing
-  // modes are predicates, which are 2 scalable bytes in size. So the scalable
-  // byte offset must always be a multiple of 2.
-  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
-  NumBytes = Offset.getFixed();
-  NumDataVectors = 0;
-  NumPredicateVectors = Offset.getScalable() / 2;
-  // This method is used to get the offsets to adjust the frame offset.
-  // If the function requires ADDPL to be used and needs more than two ADDPL
-  // instructions, part of the offset is folded into NumDataVectors so that it
-  // uses ADDVL for part of it, reducing the number of ADDPL instructions.
-  if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
-      NumPredicateVectors > 62) {
-    NumDataVectors = NumPredicateVectors / 8;
-    NumPredicateVectors -= NumDataVectors * 8;
-  }
-}
-
+void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( 
+    const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) { 
+  // The smallest scalable element supported by scaled SVE addressing 
+  // modes are predicates, which are 2 scalable bytes in size. So the scalable 
+  // byte offset must always be a multiple of 2. 
+  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 
+ 
+  // VGSized offsets are divided by '2', because the VG register is the 
+  // the number of 64bit granules as opposed to 128bit vector chunks, 
+  // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. 
+  // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. 
+  // VG = n * 2 and the dwarf offset must be VG * 8 bytes. 
+  ByteSized = Offset.getFixed(); 
+  VGSized = Offset.getScalable() / 2; 
+} 
+ 
+/// Returns the offset in parts to which this frame offset can be 
+/// decomposed for the purpose of describing a frame offset. 
+/// For non-scalable offsets this is simply its byte size. 
+void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 
+    const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, 
+    int64_t &NumDataVectors) { 
+  // The smallest scalable element supported by scaled SVE addressing 
+  // modes are predicates, which are 2 scalable bytes in size. So the scalable 
+  // byte offset must always be a multiple of 2. 
+  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 
+ 
+  NumBytes = Offset.getFixed(); 
+  NumDataVectors = 0; 
+  NumPredicateVectors = Offset.getScalable() / 2; 
+  // This method is used to get the offsets to adjust the frame offset. 
+  // If the function requires ADDPL to be used and needs more than two ADDPL 
+  // instructions, part of the offset is folded into NumDataVectors so that it 
+  // uses ADDVL for part of it, reducing the number of ADDPL instructions. 
+  if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 || 
+      NumPredicateVectors > 62) { 
+    NumDataVectors = NumPredicateVectors / 8; 
+    NumPredicateVectors -= NumDataVectors * 8; 
+  } 
+} 
+ 
 // Helper function to emit a frame offset adjustment from a given
 // pointer (SrcReg), stored into DestReg. This function is explicit
 // in that it requires the opcode.
@@ -3709,13 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                            MachineInstr::MIFlag Flag, bool SetNZCV,
                            bool NeedsWinCFI, bool *HasWinCFI) {
   int64_t Bytes, NumPredicateVectors, NumDataVectors;
-  AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
-      Offset, Bytes, NumPredicateVectors, NumDataVectors);
+  AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( 
+      Offset, Bytes, NumPredicateVectors, NumDataVectors); 
 
   // First emit non-scalable frame offsets, or a simple 'mov'.
   if (Bytes || (!Offset && SrcReg != DestReg)) {
-    assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
-           "SP increment/decrement not 8-byte aligned");
+    assert((DestReg != AArch64::SP || Bytes % 8 == 0) && 
+           "SP increment/decrement not 8-byte aligned"); 
     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
     if (Bytes < 0) {
       Bytes = -Bytes;
@@ -3970,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
   // Construct the complete offset.
   bool IsMulVL = ScaleValue.isScalable();
   unsigned Scale = ScaleValue.getKnownMinSize();
-  int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
+  int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed(); 
 
   const MachineOperand &ImmOpnd =
       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -4012,9 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
     *OutUnscaledOp = *UnscaledOp;
 
   if (IsMulVL)
-    SOffset = StackOffset::get(SOffset.getFixed(), Offset);
+    SOffset = StackOffset::get(SOffset.getFixed(), Offset); 
   else
-    SOffset = StackOffset::get(Offset, SOffset.getScalable());
+    SOffset = StackOffset::get(Offset, SOffset.getScalable()); 
   return AArch64FrameOffsetCanUpdate |
          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
 }
@@ -4026,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   unsigned ImmIdx = FrameRegIdx + 1;
 
   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
-    Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
+    Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); 
     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
@@ -4131,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
   return false;
 }
 
-// FP Opcodes that can be combined with a FMUL.
+// FP Opcodes that can be combined with a FMUL. 
 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   switch (Inst.getOpcode()) {
   default:
@@ -4153,12 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   case AArch64::FSUBv2f64:
   case AArch64::FSUBv4f32:
     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
-    // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
-    // the target options or if FADD/FSUB has the contract fast-math flag.
-    return Options.UnsafeFPMath ||
-           Options.AllowFPOpFusion == FPOpFusion::Fast ||
-           Inst.getFlag(MachineInstr::FmContract);
-    return true;
+    // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by 
+    // the target options or if FADD/FSUB has the contract fast-math flag. 
+    return Options.UnsafeFPMath || 
+           Options.AllowFPOpFusion == FPOpFusion::Fast || 
+           Inst.getFlag(MachineInstr::FmContract); 
+    return true; 
   }
   return false;
 }
@@ -4638,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern(
 /// pattern evaluator stops checking as soon as it finds a faster sequence.
 
 bool AArch64InstrInfo::getMachineCombinerPatterns(
-    MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
-    bool DoRegPressureReduce) const {
+    MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 
+    bool DoRegPressureReduce) const { 
   // Integer patterns
   if (getMaddPatterns(Root, Patterns))
     return true;
@@ -4647,8 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
   if (getFMAPatterns(Root, Patterns))
     return true;
 
-  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
-                                                     DoRegPressureReduce);
+  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 
+                                                     DoRegPressureReduce); 
 }
 
 enum class FMAInstKind { Default, Indexed, Accumulator };
@@ -4871,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
 
-  MachineInstr *MUL = nullptr;
+  MachineInstr *MUL = nullptr; 
   const TargetRegisterClass *RC;
   unsigned Opc;
   switch (Pattern) {
@@ -5692,9 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
   }
   } // end switch (Pattern)
   // Record MUL and ADD/SUB for deletion
-  // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
-  // CodeGen/AArch64/urem-seteq-nonzero.ll.
-  // assert(MUL && "MUL was never set");
+  // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and 
+  // CodeGen/AArch64/urem-seteq-nonzero.ll. 
+  // assert(MUL && "MUL was never set"); 
   DelInstrs.push_back(MUL);
   DelInstrs.push_back(&Root);
 }
@@ -6034,20 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
 static bool
 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
                                          const outliner::Candidate &b) {
-  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
-  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
+  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 
+  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 
 
-  return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
-         MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
+  return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) && 
+         MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true); 
 }
 
 static bool
 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
                                        const outliner::Candidate &b) {
-  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
-  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
+  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>(); 
+  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>(); 
 
-  return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
+  return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey(); 
 }
 
 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
@@ -6104,9 +6104,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
   // necessary. However, at this point we don't know if the outlined function
   // will have a RET instruction so we assume the worst.
   const TargetRegisterInfo &TRI = getRegisterInfo();
-  if (FirstCand.getMF()
-          ->getInfo<AArch64FunctionInfo>()
-          ->shouldSignReturnAddress(true)) {
+  if (FirstCand.getMF() 
+          ->getInfo<AArch64FunctionInfo>() 
+          ->shouldSignReturnAddress(true)) { 
     // One PAC and one AUT instructions
     NumBytesToCreateFrame += 8;
 
@@ -6163,7 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
       return false;
     };
     // Remove candidates with illegal stack modifying instructions
-    llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
+    llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification); 
 
     // If the sequence doesn't have enough candidates left, then we're done.
     if (RepeatedSequenceLocs.size() < 2)
@@ -6206,7 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
     // Erase every candidate that violates the restrictions above. (It could be
     // true that we have viable candidates, so it's not worth bailing out in
     // the case that, say, 1 out of 20 candidates violate the restructions.)
-    llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
+    llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall); 
 
     // If the sequence doesn't have enough candidates left, then we're done.
     if (RepeatedSequenceLocs.size() < 2)
@@ -6229,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
   NumBytesToCreateFrame += 4;
 
   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
-    return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
+    return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement(); 
   });
 
   // We check to see if CFI Instructions are present, and if they are
@@ -6398,60 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
       FrameID = MachineOutlinerNoLRSave;
     } else {
       SetCandidateCallInfo(MachineOutlinerDefault, 12);
-
-      // Bugzilla ID: 46767
-      // TODO: Check if fixing up the stack more than once is safe so we can
-      // outline these.
-      //
-      // An outline resulting in a caller that requires stack fixups at the
-      // callsite to a callee that also requires stack fixups can happen when
-      // there are no available registers at the candidate callsite for a
-      // candidate that itself also has calls.
-      //
-      // In other words if function_containing_sequence in the following pseudo
-      // assembly requires that we save LR at the point of the call, but there
-      // are no available registers: in this case we save using SP and as a
-      // result the SP offsets requires stack fixups by multiples of 16.
-      //
-      // function_containing_sequence:
-      //   ...
-      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
-      //   call OUTLINED_FUNCTION_N
-      //   restore LR from SP
-      //   ...
-      //
-      // OUTLINED_FUNCTION_N:
-      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
-      //   ...
-      //   bl foo
-      //   restore LR from SP
-      //   ret
-      //
-      // Because the code to handle more than one stack fixup does not
-      // currently have the proper checks for legality, these cases will assert
-      // in the AArch64 MachineOutliner. This is because the code to do this
-      // needs more hardening, testing, better checks that generated code is
-      // legal, etc and because it is only verified to handle a single pass of
-      // stack fixup.
-      //
-      // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
-      // these cases until they are known to be handled. Bugzilla 46767 is
-      // referenced in comments at the assert site.
-      //
-      // To avoid asserting (or generating non-legal code on noassert builds)
-      // we remove all candidates which would need more than one stack fixup by
-      // pruning the cases where the candidate has calls while also having no
-      // available LR and having no available general purpose registers to copy
-      // LR to (ie one extra stack save/restore).
-      //
-      if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
-        erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
-          return (std::any_of(
-                     C.front(), std::next(C.back()),
-                     [](const MachineInstr &MI) { return MI.isCall(); })) &&
-                 (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
-        });
-      }
+ 
+      // Bugzilla ID: 46767 
+      // TODO: Check if fixing up the stack more than once is safe so we can 
+      // outline these. 
+      // 
+      // An outline resulting in a caller that requires stack fixups at the 
+      // callsite to a callee that also requires stack fixups can happen when 
+      // there are no available registers at the candidate callsite for a 
+      // candidate that itself also has calls. 
+      // 
+      // In other words if function_containing_sequence in the following pseudo 
+      // assembly requires that we save LR at the point of the call, but there 
+      // are no available registers: in this case we save using SP and as a 
+      // result the SP offsets requires stack fixups by multiples of 16. 
+      // 
+      // function_containing_sequence: 
+      //   ... 
+      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 
+      //   call OUTLINED_FUNCTION_N 
+      //   restore LR from SP 
+      //   ... 
+      // 
+      // OUTLINED_FUNCTION_N: 
+      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N 
+      //   ... 
+      //   bl foo 
+      //   restore LR from SP 
+      //   ret 
+      // 
+      // Because the code to handle more than one stack fixup does not 
+      // currently have the proper checks for legality, these cases will assert 
+      // in the AArch64 MachineOutliner. This is because the code to do this 
+      // needs more hardening, testing, better checks that generated code is 
+      // legal, etc and because it is only verified to handle a single pass of 
+      // stack fixup. 
+      // 
+      // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch 
+      // these cases until they are known to be handled. Bugzilla 46767 is 
+      // referenced in comments at the assert site. 
+      // 
+      // To avoid asserting (or generating non-legal code on noassert builds) 
+      // we remove all candidates which would need more than one stack fixup by 
+      // pruning the cases where the candidate has calls while also having no 
+      // available LR and having no available general purpose registers to copy 
+      // LR to (ie one extra stack save/restore). 
+      // 
+      if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { 
+        erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) { 
+          return (std::any_of( 
+                     C.front(), std::next(C.back()), 
+                     [](const MachineInstr &MI) { return MI.isCall(); })) && 
+                 (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C)); 
+        }); 
+      } 
     }
 
     // If we dropped all of the candidates, bail out here.
@@ -6820,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
 
     // If v8.3a features are available we can replace a RET instruction by
     // RETAA or RETAB and omit the AUT instructions
-    if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
+    if (Subtarget.hasPAuth() && MBBAUT != MBB.end() && 
         MBBAUT->getOpcode() == AArch64::RET) {
       BuildMI(MBB, MBBAUT, DL,
               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
@@ -6872,12 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame(
     return MI.isCall() && !MI.isReturn();
   };
 
-  if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
+  if (llvm::any_of(MBB.instrs(), IsNonTailCall)) { 
     // Fix up the instructions in the range, since we're going to modify the
     // stack.
-
-    // Bugzilla ID: 46767
-    // TODO: Check if fixing up twice is safe so we can outline these.
+ 
+    // Bugzilla ID: 46767 
+    // TODO: Check if fixing up twice is safe so we can outline these. 
     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
            "Can only fix up stack references once");
     fixupPostOutline(MBB);
@@ -6934,11 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame(
   // If a bunch of candidates reach this point they must agree on their return
   // address signing. It is therefore enough to just consider the signing
   // behaviour of one of them
-  const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
-  bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
+  const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>(); 
+  bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction); 
 
   // a_key is the default
-  bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
+  bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey(); 
 
   // If this is a tail call outlined function, then there's already a return.
   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
@@ -7099,7 +7099,7 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
       return None;
     int Shift = MI.getOperand(3).getImm();
     assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
-    Offset = Sign * (MI.getOperand(2).getImm() << Shift);
+    Offset = Sign * (MI.getOperand(2).getImm() << Shift); 
   }
   }
   return RegImmPair{MI.getOperand(1).getReg(), Offset};
@@ -7175,14 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
   return get(Opc).TSFlags & AArch64::ElementSizeMask;
 }
 
-bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
-  return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
-}
-
-bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
-  return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
-}
-
+bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const { 
+  return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike; 
+} 
+ 
+bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const { 
+  return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; 
+} 
+ 
 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
   if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
     return AArch64::BLRNoIP;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
index 7434987e06..9b924a8440 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
@@ -112,10 +112,10 @@ public:
   /// Hint that pairing the given load or store is unprofitable.
   static void suppressLdStPair(MachineInstr &MI);
 
-  Optional<ExtAddrMode>
-  getAddrModeFromMemoryOp(const MachineInstr &MemI,
-                          const TargetRegisterInfo *TRI) const override;
-
+  Optional<ExtAddrMode> 
+  getAddrModeFromMemoryOp(const MachineInstr &MemI, 
+                          const TargetRegisterInfo *TRI) const override; 
+ 
   bool getMemOperandsWithOffsetWidth(
       const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
       int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -191,9 +191,9 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  bool analyzeBranchPredicate(MachineBasicBlock &MBB,
-                              MachineBranchPredicate &MBP,
-                              bool AllowModify) const override;
+  bool analyzeBranchPredicate(MachineBasicBlock &MBB, 
+                              MachineBranchPredicate &MBP, 
+                              bool AllowModify) const override; 
   unsigned removeBranch(MachineBasicBlock &MBB,
                         int *BytesRemoved = nullptr) const override;
   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
@@ -235,10 +235,10 @@ public:
   /// Return true when there is potentially a faster code sequence
   /// for an instruction chain ending in ``Root``. All potential patterns are
   /// listed in the ``Patterns`` array.
-  bool
-  getMachineCombinerPatterns(MachineInstr &Root,
-                             SmallVectorImpl<MachineCombinerPattern> &Patterns,
-                             bool DoRegPressureReduce) const override;
+  bool 
+  getMachineCombinerPatterns(MachineInstr &Root, 
+                             SmallVectorImpl<MachineCombinerPattern> &Patterns, 
+                             bool DoRegPressureReduce) const override; 
   /// Return true when Inst is associative and commutative so that it can be
   /// reassociated.
   bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -280,12 +280,12 @@ public:
   bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
   /// Returns the vector element size (B, H, S or D) of an SVE opcode.
   uint64_t getElementSizeForOpcode(unsigned Opc) const;
-  /// Returns true if the opcode is for an SVE instruction that sets the
-  /// condition codes as if it's results had been fed to a PTEST instruction
-  /// along with the same general predicate.
-  bool isPTestLikeOpcode(unsigned Opc) const;
-  /// Returns true if the opcode is for an SVE WHILE## instruction.
-  bool isWhileOpcode(unsigned Opc) const;
+  /// Returns true if the opcode is for an SVE instruction that sets the 
+  /// condition codes as if it's results had been fed to a PTEST instruction 
+  /// along with the same general predicate. 
+  bool isPTestLikeOpcode(unsigned Opc) const; 
+  /// Returns true if the opcode is for an SVE WHILE## instruction. 
+  bool isWhileOpcode(unsigned Opc) const; 
   /// Returns true if the instruction has a shift by immediate that can be
   /// executed in one cycle less.
   static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -299,13 +299,13 @@ public:
   Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
                                                  Register Reg) const override;
 
-  static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
-                                                  int64_t &NumBytes,
-                                                  int64_t &NumPredicateVectors,
-                                                  int64_t &NumDataVectors);
-  static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
-                                                  int64_t &ByteSized,
-                                                  int64_t &VGSized);
+  static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, 
+                                                  int64_t &NumBytes, 
+                                                  int64_t &NumPredicateVectors, 
+                                                  int64_t &NumDataVectors); 
+  static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, 
+                                                  int64_t &ByteSized, 
+                                                  int64_t &VGSized); 
 #define GET_INSTRINFO_HELPER_DECLS
 #include "AArch64GenInstrInfo.inc"
 
@@ -334,12 +334,12 @@ private:
   /// Returns an unused general-purpose register which can be used for
   /// constructing an outlined call if one exists. Returns 0 otherwise.
   unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
-
-  /// Remove a ptest of a predicate-generating operation that already sets, or
-  /// can be made to set, the condition codes in an identical manner
-  bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
-                          unsigned PredReg,
-                          const MachineRegisterInfo *MRI) const;
+ 
+  /// Remove a ptest of a predicate-generating operation that already sets, or 
+  /// can be made to set, the condition codes in an identical manner 
+  bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg, 
+                          unsigned PredReg, 
+                          const MachineRegisterInfo *MRI) const; 
 };
 
 /// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
@@ -423,18 +423,18 @@ static inline bool isIndirectBranchOpcode(int Opc) {
   return false;
 }
 
-static inline bool isPTrueOpcode(unsigned Opc) {
-  switch (Opc) {
-  case AArch64::PTRUE_B:
-  case AArch64::PTRUE_H:
-  case AArch64::PTRUE_S:
-  case AArch64::PTRUE_D:
-    return true;
-  default:
-    return false;
-  }
-}
-
+static inline bool isPTrueOpcode(unsigned Opc) { 
+  switch (Opc) { 
+  case AArch64::PTRUE_B: 
+  case AArch64::PTRUE_H: 
+  case AArch64::PTRUE_S: 
+  case AArch64::PTRUE_D: 
+    return true; 
+  default: 
+    return false; 
+  } 
+} 
+ 
 /// Return opcode to be used for indirect calls.
 unsigned getBLRCallOpcode(const MachineFunction &MF);
 
@@ -442,7 +442,7 @@ unsigned getBLRCallOpcode(const MachineFunction &MF);
 #define TSFLAG_ELEMENT_SIZE_TYPE(X)      (X)       // 3-bits
 #define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
 #define TSFLAG_FALSE_LANE_TYPE(X)       ((X) << 7) // 2-bits
-#define TSFLAG_INSTR_FLAGS(X)           ((X) << 9) // 2-bits
+#define TSFLAG_INSTR_FLAGS(X)           ((X) << 9) // 2-bits 
 // }
 
 namespace AArch64 {
@@ -475,14 +475,14 @@ enum FalseLaneType {
   FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
 };
 
-// NOTE: This is a bit field.
-static const uint64_t InstrFlagIsWhile     = TSFLAG_INSTR_FLAGS(0x1);
-static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
-
+// NOTE: This is a bit field. 
+static const uint64_t InstrFlagIsWhile     = TSFLAG_INSTR_FLAGS(0x1); 
+static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); 
+ 
 #undef TSFLAG_ELEMENT_SIZE_TYPE
 #undef TSFLAG_DESTRUCTIVE_INST_TYPE
 #undef TSFLAG_FALSE_LANE_TYPE
-#undef TSFLAG_INSTR_FLAGS
+#undef TSFLAG_INSTR_FLAGS 
 
 int getSVEPseudoMap(uint16_t Opcode);
 int getSVERevInstr(uint16_t Opcode);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
index 171d3dbaa8..8051a6a937 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
@@ -25,16 +25,16 @@ def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
                                  AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
 def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
                                  AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
-def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
-                                 AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">, 
+                                 AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">; 
 def HasVH            : Predicate<"Subtarget->hasVH()">,
                        AssemblerPredicate<(all_of FeatureVH), "vh">;
 
 def HasLOR           : Predicate<"Subtarget->hasLOR()">,
                        AssemblerPredicate<(all_of FeatureLOR), "lor">;
 
-def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
-                       AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
+def HasPAuth         : Predicate<"Subtarget->hasPAuth()">, 
+                       AssemblerPredicate<(all_of FeaturePAuth), "pauth">; 
 
 def HasJS            : Predicate<"Subtarget->hasJS()">,
                        AssemblerPredicate<(all_of FeatureJS), "jsconv">;
@@ -69,8 +69,8 @@ def HasPMU           : Predicate<"Subtarget->hasPMU()">,
 def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
                        AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
 
-def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
-                       AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
+def HasFlagM         : Predicate<"Subtarget->hasFlagM()">, 
+                       AssemblerPredicate<(all_of FeatureFlagM), "flagm">; 
 
 def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
                        AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
@@ -151,16 +151,16 @@ def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
                        AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
 def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
                        AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
-def HasXS            : Predicate<"Subtarget->hasXS()">,
-                       AssemblerPredicate<(all_of FeatureXS), "xs">;
-def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
-                       AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
-def HasLS64          : Predicate<"Subtarget->hasLS64()">,
-                       AssemblerPredicate<(all_of FeatureLS64), "ls64">;
-def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
-                       AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
-def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
-                       AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
+def HasXS            : Predicate<"Subtarget->hasXS()">, 
+                       AssemblerPredicate<(all_of FeatureXS), "xs">; 
+def HasWFxT          : Predicate<"Subtarget->hasWFxT()">, 
+                       AssemblerPredicate<(all_of FeatureWFxT), "wfxt">; 
+def HasLS64          : Predicate<"Subtarget->hasLS64()">, 
+                       AssemblerPredicate<(all_of FeatureLS64), "ls64">; 
+def HasBRBE          : Predicate<"Subtarget->hasBRBE()">, 
+                       AssemblerPredicate<(all_of FeatureBRBE), "brbe">; 
+def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">, 
+                       AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">; 
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
@@ -411,12 +411,12 @@ def AArch64call          : SDNode<"AArch64ISD::CALL",
                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                                  SDNPVariadic]>;
-
-def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
-                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
-                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                              SDNPVariadic]>;
-
+ 
+def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 
+                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 
+                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 
+                              SDNPVariadic]>; 
+ 
 def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
                                 [SDNPHasChain]>;
 def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@@ -518,7 +518,7 @@ def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
 def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
 def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
 def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
-                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 
 
 def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
 def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
@@ -570,19 +570,19 @@ def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 
 def AArch64srhadd   : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
 def AArch64urhadd   : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
-def AArch64shadd   : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
-def AArch64uhadd   : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
-
-def AArch64uabd_n   : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
-def AArch64sabd_n   : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>;
-
-def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
-                               [(AArch64uabd_n node:$lhs, node:$rhs),
-                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
-def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
-                               [(AArch64sabd_n node:$lhs, node:$rhs),
-                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
-
+def AArch64shadd   : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>; 
+def AArch64uhadd   : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>; 
+
+def AArch64uabd_n   : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>; 
+def AArch64sabd_n   : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>; 
+ 
+def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs), 
+                               [(AArch64uabd_n node:$lhs, node:$rhs), 
+                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 
+def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs), 
+                               [(AArch64sabd_n node:$lhs, node:$rhs), 
+                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 
+ 
 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -617,8 +617,8 @@ let RecomputePerFunction = 1 in {
   // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
   def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
 
-  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
-  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
+  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 
+  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 
 
   def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
   def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
@@ -716,8 +716,8 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
 // 32-bit jump table destination is actually only 2 instructions since we can
 // use the table itself as a PC-relative base. But optimization occurs after
 // branch relaxation so be pessimistic.
-let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
-    isNotDuplicable = 1 in {
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 
+    isNotDuplicable = 1 in { 
 def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
                              (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
                       Sched<[]>;
@@ -801,34 +801,34 @@ def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
   let Inst{12}   = 0;
   let Predicates = [HasTRACEV8_4];
 }
-
-def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
-  let CRm{1-0}   = 0b11;
-  let Inst{9-8}  = 0b10;
-  let Predicates = [HasXS];
-}
-
-let Predicates = [HasWFxT] in {
-def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
-def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
-}
-
-// Branch Record Buffer two-word mnemonic instructions
-class BRBEI<bits<3> op2, string keyword>
-    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
-  let Inst{31-8} = 0b110101010000100101110010;
-  let Inst{7-5} = op2;
-  let Predicates = [HasBRBE];
-}
-def BRB_IALL: BRBEI<0b100, "\tiall">;
-def BRB_INJ:  BRBEI<0b101, "\tinj">;
-
-}
-
-// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
-def : TokenAlias<"INJ", "inj">;
-def : TokenAlias<"IALL", "iall">;
-
+ 
+def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 
+  let CRm{1-0}   = 0b11; 
+  let Inst{9-8}  = 0b10; 
+  let Predicates = [HasXS]; 
+}
+
+let Predicates = [HasWFxT] in { 
+def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 
+def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 
+} 
+ 
+// Branch Record Buffer two-word mnemonic instructions 
+class BRBEI<bits<3> op2, string keyword> 
+    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 
+  let Inst{31-8} = 0b110101010000100101110010; 
+  let Inst{7-5} = op2; 
+  let Predicates = [HasBRBE]; 
+} 
+def BRB_IALL: BRBEI<0b100, "\tiall">; 
+def BRB_INJ:  BRBEI<0b101, "\tinj">; 
+ 
+} 
+ 
+// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 
+def : TokenAlias<"INJ", "inj">; 
+def : TokenAlias<"IALL", "iall">; 
+ 
 // ARMv8.2-A Dot Product
 let Predicates = [HasDotProd] in {
 defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", int_aarch64_neon_sdot>;
@@ -849,23 +849,23 @@ def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
 def BFCVTN       : SIMD_BFCVTN;
 def BFCVTN2      : SIMD_BFCVTN2;
 def BFCVT        : BF16ToSinglePrecision<"bfcvt">;
-
-// Vector-scalar BFDOT:
-// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
-// register (the instruction uses a single 32-bit lane from it), so the pattern
-// is a bit tricky.
-def : Pat<(v2f32 (int_aarch64_neon_bfdot
-                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
-                    (v4bf16 (bitconvert
-                      (v2i32 (AArch64duplane32
-                        (v4i32 (bitconvert
-                          (v8bf16 (insert_subvector undef,
-                            (v4bf16 V64:$Rm),
-                            (i64 0))))),
-                        VectorIndexS:$idx)))))),
-          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
-                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
-                             VectorIndexS:$idx)>;
+ 
+// Vector-scalar BFDOT: 
+// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 
+// register (the instruction uses a single 32-bit lane from it), so the pattern 
+// is a bit tricky. 
+def : Pat<(v2f32 (int_aarch64_neon_bfdot 
+                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 
+                    (v4bf16 (bitconvert 
+                      (v2i32 (AArch64duplane32 
+                        (v4i32 (bitconvert 
+                          (v8bf16 (insert_subvector undef, 
+                            (v4bf16 V64:$Rm), 
+                            (i64 0))))), 
+                        VectorIndexS:$idx)))))), 
+          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 
+                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 
+                             VectorIndexS:$idx)>; 
 }
 
 // ARMv8.6A AArch64 matrix multiplication
@@ -965,7 +965,7 @@ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
   def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
             (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
 }
-
+ 
 let Predicates = [HasComplxNum, HasNEON] in {
   def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
             (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
@@ -979,47 +979,47 @@ let Predicates = [HasComplxNum, HasNEON] in {
   }
 }
 
-multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
-            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
-            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
-            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
-            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
-}
-
-multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> {
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
-            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
-            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
-            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
-  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
-            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
-}
-
-
-let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
-  defm : FCMLA_PATS<v4f16, V64>;
-  defm : FCMLA_PATS<v8f16, V128>;
-
-  defm : FCMLA_LANE_PATS<v4f16, V64,
-                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
-  defm : FCMLA_LANE_PATS<v8f16, V128,
-                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
-}
-let Predicates = [HasComplxNum, HasNEON] in {
-  defm : FCMLA_PATS<v2f32, V64>;
-  defm : FCMLA_PATS<v4f32, V128>;
-  defm : FCMLA_PATS<v2f64, V128>;
-
-  defm : FCMLA_LANE_PATS<v4f32, V128,
-                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
-}
-
+multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> { 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 
+            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 
+            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 
+            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 
+            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 
+} 
+ 
+multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> { 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 
+            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 
+            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 
+            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 
+  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 
+            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 
+} 
+ 
+ 
+let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 
+  defm : FCMLA_PATS<v4f16, V64>; 
+  defm : FCMLA_PATS<v8f16, V128>; 
+ 
+  defm : FCMLA_LANE_PATS<v4f16, V64, 
+                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 
+  defm : FCMLA_LANE_PATS<v8f16, V128, 
+                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 
+} 
+let Predicates = [HasComplxNum, HasNEON] in { 
+  defm : FCMLA_PATS<v2f32, V64>; 
+  defm : FCMLA_PATS<v4f32, V128>; 
+  defm : FCMLA_PATS<v2f64, V128>; 
+ 
+  defm : FCMLA_LANE_PATS<v4f32, V128, 
+                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 
+} 
+ 
 // v8.3a Pointer Authentication
 // These instructions inhabit part of the hint space and so can be used for
 // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
@@ -1073,7 +1073,7 @@ def : InstAlias<"autib1716", (AUTIB1716), 0>;
 def : InstAlias<"xpaclri", (XPACLRI), 0>;
 
 // These pointer authentication instructions require armv8.3a
-let Predicates = [HasPAuth] in {
+let Predicates = [HasPAuth] in { 
 
   // When PA is enabled, a better mnemonic should be emitted.
   def : InstAlias<"paciaz", (PACIAZ), 1>;
@@ -1104,8 +1104,8 @@ let Predicates = [HasPAuth] in {
   defm PAC : SignAuth<0b000, 0b010, "pac">;
   defm AUT : SignAuth<0b001, 0b011, "aut">;
 
-  def XPACI : ClearAuth<0, "xpaci">;
-  def XPACD : ClearAuth<1, "xpacd">;
+  def XPACI : ClearAuth<0, "xpaci">; 
+  def XPACD : ClearAuth<1, "xpacd">; 
   def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
 
   // Combined Instructions
@@ -1140,7 +1140,7 @@ let Predicates = [HasPAuth] in {
 }
 
 // v8.3a floating point conversion for javascript
-let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
+let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 
 def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
                                       "fjcvtzs",
                                       [(set GPR32:$Rd,
@@ -1149,7 +1149,7 @@ def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
 } // HasJS, HasFPARMv8
 
 // v8.4 Flag manipulation instructions
-let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
+let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 
 def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
   let Inst{20-5} = 0b0000001000000000;
 }
@@ -1157,7 +1157,7 @@ def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
 def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
 def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
                         "{\t$Rn, $imm, $mask}">;
-} // HasFlagM
+} // HasFlagM 
 
 // v8.5 flag manipulation instructions
 let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
@@ -1206,12 +1206,12 @@ def HWASAN_CHECK_MEMACCESS : Pseudo<
   (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
   [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
   Sched<[]>;
-}
-
-let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
+} 
+ 
+let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 
 def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
   (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
-  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
+  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 
   Sched<[]>;
 }
 
@@ -1558,16 +1558,16 @@ def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
 def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
 def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
 
-def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
-          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
-def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
-          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 
+          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 
 def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
           (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
-          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
-def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
-          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 
+          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 
 def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
           (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
 
@@ -2154,8 +2154,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
   def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
                 Sched<[WriteBrReg]>,
                 PseudoInstExpansion<(BLR GPR64:$Rn)>;
-  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
-                     Sched<[WriteBrReg]>;
+  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 
+                     Sched<[WriteBrReg]>; 
 } // isCall
 
 def : Pat<(AArch64call GPR64:$Rn),
@@ -2165,10 +2165,10 @@ def : Pat<(AArch64call GPR64noip:$Rn),
           (BLRNoIP GPR64noip:$Rn)>,
       Requires<[SLSBLRMitigation]>;
 
-def : Pat<(AArch64call_rvmarker GPR64:$Rn),
-          (BLR_RVMARKER GPR64:$Rn)>,
-      Requires<[NoSLSBLRMitigation]>;
-
+def : Pat<(AArch64call_rvmarker GPR64:$Rn), 
+          (BLR_RVMARKER GPR64:$Rn)>, 
+      Requires<[NoSLSBLRMitigation]>; 
+ 
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
 } // isBranch, isTerminator, isBarrier, isIndirectBranch
@@ -3900,7 +3900,7 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
 // Floating point immediate move.
 //===----------------------------------------------------------------------===//
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 
 defm FMOV : FPMoveImmediate<"fmov">;
 }
 
@@ -3909,7 +3909,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
 //===----------------------------------------------------------------------===//
 
 defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          AArch64uabd>;
+                                          AArch64uabd>; 
 // Match UABDL in log2-shuffle patterns.
 def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
                            (zext (v8i8 V64:$opB))))),
@@ -4041,7 +4041,7 @@ def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
 def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
 def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
+def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>; 
 def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
 
 defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
@@ -4160,9 +4160,9 @@ defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
 defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
 defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
 defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
-      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
-defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
-defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
+      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 
+defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 
+defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>; 
 defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
 defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
 defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
@@ -4179,9 +4179,9 @@ defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
 defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
 defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
-defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
+      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 
+defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 
+defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>; 
 defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
 defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
 defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
@@ -4579,10 +4579,10 @@ def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
           (FCVTPSv1i64 FPR64:$Rn)>;
 def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
           (FCVTPUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
-          (FCVTZSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
-          (FCVTZUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 
+          (FCVTZSv1i64 FPR64:$Rn)>; 
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 
+          (FCVTZUv1i64 FPR64:$Rn)>; 
 
 def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
           (FRECPEv1f16 FPR16:$Rn)>;
@@ -4754,9 +4754,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
 defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
 defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
 defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
-                                             AArch64sabd>;
+                                             AArch64sabd>; 
 defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
-                                          AArch64sabd>;
+                                          AArch64sabd>; 
 defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
             BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
 defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
@@ -4777,59 +4777,59 @@ defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
 defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                  BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
 defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              AArch64uabd>;
+                                              AArch64uabd>; 
 defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
-                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
+                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 
 defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
-                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
+                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 
 defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
     TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
 defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
     TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
 defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
 defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
-                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
+                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 
 defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
-                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
-
-// Additional patterns for [SU]ML[AS]L
-multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
-  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
-  def : Pat<(v4i16 (opnode
-                    V64:$Ra,
-                    (v4i16 (extract_subvector
-                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
-                            (i64 0))))),
-             (EXTRACT_SUBREG (v8i16 (INST8B
-                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
-                                     V64:$Rn, V64:$Rm)), dsub)>;
-  def : Pat<(v2i32 (opnode
-                    V64:$Ra,
-                    (v2i32 (extract_subvector
-                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
-                            (i64 0))))),
-             (EXTRACT_SUBREG (v4i32 (INST4H
-                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
-                                     V64:$Rn, V64:$Rm)), dsub)>;
-  def : Pat<(v1i64 (opnode
-                    V64:$Ra,
-                    (v1i64 (extract_subvector
-                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
-                            (i64 0))))),
-             (EXTRACT_SUBREG (v2i64 (INST2S
-                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
-                                     V64:$Rn, V64:$Rm)), dsub)>;
-}
-
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
-     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
-     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
-     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
-     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
-
+                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 
+
+// Additional patterns for [SU]ML[AS]L 
+multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 
+  Instruction INST8B, Instruction INST4H, Instruction INST2S> { 
+  def : Pat<(v4i16 (opnode 
+                    V64:$Ra, 
+                    (v4i16 (extract_subvector 
+                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 
+                            (i64 0))))), 
+             (EXTRACT_SUBREG (v8i16 (INST8B 
+                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 
+                                     V64:$Rn, V64:$Rm)), dsub)>; 
+  def : Pat<(v2i32 (opnode 
+                    V64:$Ra, 
+                    (v2i32 (extract_subvector 
+                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 
+                            (i64 0))))), 
+             (EXTRACT_SUBREG (v4i32 (INST4H 
+                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 
+                                     V64:$Rn, V64:$Rm)), dsub)>; 
+  def : Pat<(v1i64 (opnode 
+                    V64:$Ra, 
+                    (v1i64 (extract_subvector 
+                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 
+                            (i64 0))))), 
+             (EXTRACT_SUBREG (v2i64 (INST2S 
+                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 
+                                     V64:$Rn, V64:$Rm)), dsub)>; 
+} 
+ 
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull, 
+     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull, 
+     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull, 
+     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull, 
+     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 
+ 
 // Additional patterns for SMULL and UMULL
 multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
   Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -5041,26 +5041,26 @@ defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
 defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
 defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
 defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
-
-let Predicates = [HasFullFP16] in {
-def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
-            (FADDPv2i16p
-              (EXTRACT_SUBREG
-                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
-               dsub))>;
-def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
-          (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
-}
-def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
-          (FADDPv2i32p
-            (EXTRACT_SUBREG
-              (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
-             dsub))>;
-def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
-          (FADDPv2i32p V64:$Rn)>;
-def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
-          (FADDPv2i64p V128:$Rn)>;
-
+ 
+let Predicates = [HasFullFP16] in { 
+def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 
+            (FADDPv2i16p 
+              (EXTRACT_SUBREG 
+                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))), 
+               dsub))>; 
+def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 
+          (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>; 
+} 
+def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 
+          (FADDPv2i32p 
+            (EXTRACT_SUBREG 
+              (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))), 
+             dsub))>; 
+def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 
+          (FADDPv2i32p V64:$Rn)>; 
+def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 
+          (FADDPv2i64p V128:$Rn)>; 
+ 
 def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
 def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -5312,16 +5312,16 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
               (i64 0)),
             dsub)>;
 
-def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
-            (i64 VectorIndexH:$imm)),
-          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
-def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
-            (i64 VectorIndexS:$imm)),
-          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
-def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
-            (i64 VectorIndexD:$imm)),
-          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
-
+def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0), 
+            (i64 VectorIndexH:$imm)), 
+          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 
+def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0), 
+            (i64 VectorIndexS:$imm)), 
+          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 
+def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), 
+            (i64 VectorIndexD:$imm)), 
+          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 
+ 
 def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
             (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
           (INSvi16lane
@@ -6833,18 +6833,18 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
 
 // __builtin_trap() uses the BRK instruction on AArch64.
 def : Pat<(trap), (BRK 1)>;
-def : Pat<(debugtrap), (BRK 0xF000)>;
-
-def ubsan_trap_xform : SDNodeXForm<timm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
-}]>;
-
-def ubsan_trap_imm : TImmLeaf<i32, [{
-  return isUInt<8>(Imm);
-}], ubsan_trap_xform>;
-
-def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
-
+def : Pat<(debugtrap), (BRK 0xF000)>; 
+
+def ubsan_trap_xform : SDNodeXForm<timm, [{ 
+  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 
+}]>; 
+ 
+def ubsan_trap_imm : TImmLeaf<i32, [{ 
+  return isUInt<8>(Imm); 
+}], ubsan_trap_xform>; 
+ 
+def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 
+ 
 // Multiply high patterns which multiply the lower subvector using smull/umull
 // and the upper subvector with smull2/umull2. Then shuffle the high the high
 // part of both results together.
@@ -7639,9 +7639,9 @@ def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
 def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
                 (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
           (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
-                (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
-          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 
+                (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 
+          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 
 
 // Scalar 64-bit shifts in FPR64 registers.
 def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
@@ -7844,23 +7844,23 @@ let AddedComplexity = 10 in {
   // FIXME: add SVE dot-product patterns.
 }
 
-let Predicates = [HasLS64] in {
-  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
-                                          (outs GPR64x8:$Rt)>;
-  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
-                                          (outs)>;
-  def ST64BV:   Store64BV<0b011, "st64bv">;
-  def ST64BV0:  Store64BV<0b010, "st64bv0">;
-
-  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
-    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
-          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
-
-  def : ST64BPattern<int_aarch64_st64b, ST64B>;
-  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
-  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
-}
-
+let Predicates = [HasLS64] in { 
+  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 
+                                          (outs GPR64x8:$Rt)>; 
+  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 
+                                          (outs)>; 
+  def ST64BV:   Store64BV<0b011, "st64bv">; 
+  def ST64BV0:  Store64BV<0b010, "st64bv0">; 
+ 
+  class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 
+    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 
+          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 
+ 
+  def : ST64BPattern<int_aarch64_st64b, ST64B>; 
+  def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 
+  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 
+} 
+ 
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index ad180cb293..f87385ccd4 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1186,10 +1186,10 @@ bool AArch64LoadStoreOpt::findMatchingStore(
     // store instruction writes and the stored value is not modified, we can
     // promote the load. Since we do not handle stores with pre-/post-index,
     // it's unnecessary to check if BaseReg is modified by the store itself.
-    // Also we can't handle stores without an immediate offset operand,
-    // while the operand might be the address for a global variable.
+    // Also we can't handle stores without an immediate offset operand, 
+    // while the operand might be the address for a global variable. 
     if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
-        BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
+        BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() && 
         isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
         ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
       StoreI = MBBI;
@@ -1552,27 +1552,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
             continue;
           }
         }
-        // If the destination register of one load is the same register or a
-        // sub/super register of the other load, bail and keep looking. A
-        // load-pair instruction with both destination registers the same is
-        // UNPREDICTABLE and will result in an exception.
-        if (MayLoad &&
-            TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
+        // If the destination register of one load is the same register or a 
+        // sub/super register of the other load, bail and keep looking. A 
+        // load-pair instruction with both destination registers the same is 
+        // UNPREDICTABLE and will result in an exception. 
+        if (MayLoad && 
+            TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { 
           LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
                                             TRI);
           MemInsns.push_back(&MI);
           continue;
         }
 
-        // If the BaseReg has been modified, then we cannot do the optimization.
-        // For example, in the following pattern
-        //   ldr x1 [x2]
-        //   ldr x2 [x3]
-        //   ldr x4 [x2, #8],
-        // the first and third ldr cannot be converted to ldp x1, x4, [x2]
-        if (!ModifiedRegUnits.available(BaseReg))
-          return E;
-
+        // If the BaseReg has been modified, then we cannot do the optimization. 
+        // For example, in the following pattern 
+        //   ldr x1 [x2] 
+        //   ldr x2 [x3] 
+        //   ldr x4 [x2, #8], 
+        // the first and third ldr cannot be converted to ldp x1, x4, [x2] 
+        if (!ModifiedRegUnits.available(BaseReg)) 
+          return E; 
+ 
         // If the Rt of the second instruction was not modified or used between
         // the two instructions and none of the instructions between the second
         // and first alias with the second, we can combine the second into the
@@ -1763,11 +1763,11 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
   return false;
 }
 
-static bool needsWinCFI(const MachineFunction *MF) {
-  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
-         MF->getFunction().needsUnwindTableEntry();
-}
-
+static bool needsWinCFI(const MachineFunction *MF) { 
+  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && 
+         MF->getFunction().needsUnwindTableEntry(); 
+} 
+ 
 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
     MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
   MachineBasicBlock::iterator E = I->getParent()->end();
@@ -1808,11 +1808,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
   // the memory access (I) and the increment (MBBI) can access the memory
   // region defined by [SP, MBBI].
   const bool BaseRegSP = BaseReg == AArch64::SP;
-  if (BaseRegSP && needsWinCFI(I->getMF())) {
+  if (BaseRegSP && needsWinCFI(I->getMF())) { 
     // FIXME: For now, we always block the optimization over SP in windows
     // targets as it requires to adjust the unwind/debug info, messing up
     // the unwind info can actually cause a miscompile.
-    return E;
+    return E; 
   }
 
   for (unsigned Count = 0; MBBI != E && Count < Limit;
@@ -1868,14 +1868,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
     }
   }
 
-  const bool BaseRegSP = BaseReg == AArch64::SP;
-  if (BaseRegSP && needsWinCFI(I->getMF())) {
-    // FIXME: For now, we always block the optimization over SP in windows
-    // targets as it requires to adjust the unwind/debug info, messing up
-    // the unwind info can actually cause a miscompile.
-    return E;
-  }
-
+  const bool BaseRegSP = BaseReg == AArch64::SP; 
+  if (BaseRegSP && needsWinCFI(I->getMF())) { 
+    // FIXME: For now, we always block the optimization over SP in windows 
+    // targets as it requires to adjust the unwind/debug info, messing up 
+    // the unwind info can actually cause a miscompile. 
+    return E; 
+  } 
+ 
   // Track which register units have been modified and used between the first
   // insn (inclusive) and the second insn.
   ModifiedRegUnits.clear();
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
index 10e191ff44..c923f53281 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -203,12 +203,12 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
     RefFlags |= AArch64MCExpr::VK_SABS;
   } else {
     RefFlags |= AArch64MCExpr::VK_ABS;
-
-    if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
-      RefFlags |= AArch64MCExpr::VK_PAGE;
-    else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
-             AArch64II::MO_PAGEOFF)
-      RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC;
+ 
+    if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) 
+      RefFlags |= AArch64MCExpr::VK_PAGE; 
+    else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == 
+             AArch64II::MO_PAGEOFF) 
+      RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC; 
   }
 
   if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 41343ba970..ebb501b779 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64MachineFunctionInfo.h"
-#include "AArch64InstrInfo.h"
-#include <llvm/IR/Metadata.h>
-#include <llvm/IR/Module.h>
+#include "AArch64InstrInfo.h" 
+#include <llvm/IR/Metadata.h> 
+#include <llvm/IR/Module.h> 
 
 using namespace llvm;
 
@@ -33,82 +33,82 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
   if (YamlMFI.HasRedZone.hasValue())
     HasRedZone = YamlMFI.HasRedZone;
 }
-
-static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
-  // The function should be signed in the following situations:
-  // - sign-return-address=all
-  // - sign-return-address=non-leaf and the functions spills the LR
-  if (!F.hasFnAttribute("sign-return-address")) {
-    const Module &M = *F.getParent();
-    if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
-            M.getModuleFlag("sign-return-address"))) {
-      if (Sign->getZExtValue()) {
-        if (const auto *All = mdconst::extract_or_null<ConstantInt>(
-                M.getModuleFlag("sign-return-address-all")))
-          return {true, All->getZExtValue()};
-        return {true, false};
-      }
-    }
-    return {false, false};
-  }
-
-  StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
-  if (Scope.equals("none"))
-    return {false, false};
-
-  if (Scope.equals("all"))
-    return {true, true};
-
-  assert(Scope.equals("non-leaf"));
-  return {true, false};
-}
-
-static bool ShouldSignWithBKey(const Function &F) {
-  if (!F.hasFnAttribute("sign-return-address-key")) {
-    if (const auto *BKey = mdconst::extract_or_null<ConstantInt>(
-            F.getParent()->getModuleFlag("sign-return-address-with-bkey")))
-      return BKey->getZExtValue();
-    return false;
-  }
-
-  const StringRef Key =
-      F.getFnAttribute("sign-return-address-key").getValueAsString();
-  assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
-  return Key.equals_lower("b_key");
-}
-
-AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
-  // If we already know that the function doesn't have a redzone, set
-  // HasRedZone here.
-  if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
-    HasRedZone = false;
-
-  const Function &F = MF.getFunction();
-  std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F);
-  SignWithBKey = ShouldSignWithBKey(F);
-
-  if (!F.hasFnAttribute("branch-target-enforcement")) {
-    if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
-            F.getParent()->getModuleFlag("branch-target-enforcement")))
-      BranchTargetEnforcement = BTE->getZExtValue();
-    return;
-  }
-
-  const StringRef BTIEnable = F.getFnAttribute("branch-target-enforcement").getValueAsString();
-  assert(BTIEnable.equals_lower("true") || BTIEnable.equals_lower("false"));
-  BranchTargetEnforcement = BTIEnable.equals_lower("true");
-}
-
-bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
-  if (!SignReturnAddress)
-    return false;
-  if (SignReturnAddressAll)
-    return true;
-  return SpillsLR;
-}
-
-bool AArch64FunctionInfo::shouldSignReturnAddress() const {
-  return shouldSignReturnAddress(llvm::any_of(
-      MF.getFrameInfo().getCalleeSavedInfo(),
-      [](const auto &Info) { return Info.getReg() == AArch64::LR; }));
-}
+ 
+static std::pair<bool, bool> GetSignReturnAddress(const Function &F) { 
+  // The function should be signed in the following situations: 
+  // - sign-return-address=all 
+  // - sign-return-address=non-leaf and the functions spills the LR 
+  if (!F.hasFnAttribute("sign-return-address")) { 
+    const Module &M = *F.getParent(); 
+    if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( 
+            M.getModuleFlag("sign-return-address"))) { 
+      if (Sign->getZExtValue()) { 
+        if (const auto *All = mdconst::extract_or_null<ConstantInt>( 
+                M.getModuleFlag("sign-return-address-all"))) 
+          return {true, All->getZExtValue()}; 
+        return {true, false}; 
+      } 
+    } 
+    return {false, false}; 
+  } 
+ 
+  StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString(); 
+  if (Scope.equals("none")) 
+    return {false, false}; 
+ 
+  if (Scope.equals("all")) 
+    return {true, true}; 
+ 
+  assert(Scope.equals("non-leaf")); 
+  return {true, false}; 
+} 
+ 
+static bool ShouldSignWithBKey(const Function &F) { 
+  if (!F.hasFnAttribute("sign-return-address-key")) { 
+    if (const auto *BKey = mdconst::extract_or_null<ConstantInt>( 
+            F.getParent()->getModuleFlag("sign-return-address-with-bkey"))) 
+      return BKey->getZExtValue(); 
+    return false; 
+  } 
+ 
+  const StringRef Key = 
+      F.getFnAttribute("sign-return-address-key").getValueAsString(); 
+  assert(Key.equals_lower("a_key") || Key.equals_lower("b_key")); 
+  return Key.equals_lower("b_key"); 
+} 
+ 
+AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) { 
+  // If we already know that the function doesn't have a redzone, set 
+  // HasRedZone here. 
+  if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) 
+    HasRedZone = false; 
+ 
+  const Function &F = MF.getFunction(); 
+  std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F); 
+  SignWithBKey = ShouldSignWithBKey(F); 
+ 
+  if (!F.hasFnAttribute("branch-target-enforcement")) { 
+    if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( 
+            F.getParent()->getModuleFlag("branch-target-enforcement"))) 
+      BranchTargetEnforcement = BTE->getZExtValue(); 
+    return; 
+  } 
+ 
+  const StringRef BTIEnable = F.getFnAttribute("branch-target-enforcement").getValueAsString(); 
+  assert(BTIEnable.equals_lower("true") || BTIEnable.equals_lower("false")); 
+  BranchTargetEnforcement = BTIEnable.equals_lower("true"); 
+} 
+ 
+bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const { 
+  if (!SignReturnAddress) 
+    return false; 
+  if (SignReturnAddressAll) 
+    return true; 
+  return SpillsLR; 
+} 
+ 
+bool AArch64FunctionInfo::shouldSignReturnAddress() const { 
+  return shouldSignReturnAddress(llvm::any_of( 
+      MF.getFrameInfo().getCalleeSavedInfo(), 
+      [](const auto &Info) { return Info.getReg() == AArch64::LR; })); 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index f60e2b6c31..b3f35a46c7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -35,9 +35,9 @@ class MachineInstr;
 /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
 /// contains private AArch64-specific information for each MachineFunction.
 class AArch64FunctionInfo final : public MachineFunctionInfo {
-  /// Backreference to the machine function.
-  MachineFunction &MF;
-
+  /// Backreference to the machine function. 
+  MachineFunction &MF; 
+ 
   /// Number of bytes of arguments this function has on the stack. If the callee
   /// is expected to restore the argument stack this should be a multiple of 16,
   /// all usable during a tail call.
@@ -128,39 +128,39 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// that must be forwarded to every musttail call.
   SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
 
-  /// FrameIndex for the tagged base pointer.
-  Optional<int> TaggedBasePointerIndex;
-
-  /// Offset from SP-at-entry to the tagged base pointer.
-  /// Tagged base pointer is set up to point to the first (lowest address)
-  /// tagged stack slot.
-  unsigned TaggedBasePointerOffset;
+  /// FrameIndex for the tagged base pointer. 
+  Optional<int> TaggedBasePointerIndex; 
 
+  /// Offset from SP-at-entry to the tagged base pointer. 
+  /// Tagged base pointer is set up to point to the first (lowest address) 
+  /// tagged stack slot. 
+  unsigned TaggedBasePointerOffset; 
+ 
   /// OutliningStyle denotes, if a function was outined, how it was outlined,
   /// e.g. Tail Call, Thunk, or Function if none apply.
   Optional<std::string> OutliningStyle;
 
-  // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
-  // CalleeSavedStackSize) to the address of the frame record.
-  int CalleeSaveBaseToFrameRecordOffset = 0;
-
-  /// SignReturnAddress is true if PAC-RET is enabled for the function with
-  /// defaults being sign non-leaf functions only, with the B key.
-  bool SignReturnAddress = false;
-
-  /// SignReturnAddressAll modifies the default PAC-RET mode to signing leaf
-  /// functions as well.
-  bool SignReturnAddressAll = false;
-
-  /// SignWithBKey modifies the default PAC-RET mode to signing with the B key.
-  bool SignWithBKey = false;
-
-  /// BranchTargetEnforcement enables placing BTI instructions at potential
-  /// indirect branch destinations.
-  bool BranchTargetEnforcement = false;
-
+  // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus 
+  // CalleeSavedStackSize) to the address of the frame record. 
+  int CalleeSaveBaseToFrameRecordOffset = 0; 
+ 
+  /// SignReturnAddress is true if PAC-RET is enabled for the function with 
+  /// defaults being sign non-leaf functions only, with the B key. 
+  bool SignReturnAddress = false; 
+ 
+  /// SignReturnAddressAll modifies the default PAC-RET mode to signing leaf 
+  /// functions as well. 
+  bool SignReturnAddressAll = false; 
+ 
+  /// SignWithBKey modifies the default PAC-RET mode to signing with the B key. 
+  bool SignWithBKey = false; 
+ 
+  /// BranchTargetEnforcement enables placing BTI instructions at potential 
+  /// indirect branch destinations. 
+  bool BranchTargetEnforcement = false; 
+ 
 public:
-  explicit AArch64FunctionInfo(MachineFunction &MF);
+  explicit AArch64FunctionInfo(MachineFunction &MF); 
 
   void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
 
@@ -297,14 +297,14 @@ public:
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
   unsigned getJumpTableEntrySize(int Idx) const {
-    return JumpTableEntryInfo[Idx].first;
+    return JumpTableEntryInfo[Idx].first; 
   }
   MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
-    return JumpTableEntryInfo[Idx].second;
+    return JumpTableEntryInfo[Idx].second; 
   }
   void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
-    if ((unsigned)Idx >= JumpTableEntryInfo.size())
-      JumpTableEntryInfo.resize(Idx+1);
+    if ((unsigned)Idx >= JumpTableEntryInfo.size()) 
+      JumpTableEntryInfo.resize(Idx+1); 
     JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
   }
 
@@ -346,11 +346,11 @@ public:
     return ForwardedMustTailRegParms;
   }
 
-  Optional<int> getTaggedBasePointerIndex() const {
-    return TaggedBasePointerIndex;
-  }
-  void setTaggedBasePointerIndex(int Index) { TaggedBasePointerIndex = Index; }
-
+  Optional<int> getTaggedBasePointerIndex() const { 
+    return TaggedBasePointerIndex; 
+  } 
+  void setTaggedBasePointerIndex(int Index) { TaggedBasePointerIndex = Index; } 
+ 
   unsigned getTaggedBasePointerOffset() const {
     return TaggedBasePointerOffset;
   }
@@ -358,26 +358,26 @@ public:
     TaggedBasePointerOffset = Offset;
   }
 
-  int getCalleeSaveBaseToFrameRecordOffset() const {
-    return CalleeSaveBaseToFrameRecordOffset;
-  }
-  void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
-    CalleeSaveBaseToFrameRecordOffset = Offset;
-  }
-
-  bool shouldSignReturnAddress() const;
-  bool shouldSignReturnAddress(bool SpillsLR) const;
-
-  bool shouldSignWithBKey() const { return SignWithBKey; }
-
-  bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
-
+  int getCalleeSaveBaseToFrameRecordOffset() const { 
+    return CalleeSaveBaseToFrameRecordOffset; 
+  } 
+  void setCalleeSaveBaseToFrameRecordOffset(int Offset) { 
+    CalleeSaveBaseToFrameRecordOffset = Offset; 
+  } 
+ 
+  bool shouldSignReturnAddress() const; 
+  bool shouldSignReturnAddress(bool SpillsLR) const; 
+ 
+  bool shouldSignWithBKey() const { return SignWithBKey; } 
+ 
+  bool branchTargetEnforcement() const { return BranchTargetEnforcement; } 
+ 
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;
   SetOfInstructions LOHRelated;
 
-  SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo;
+  SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo; 
 };
 
 namespace yaml {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
index f3b8ef16d6..0e9cb143f2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -21,7 +21,7 @@ namespace {
 
 /// CMN, CMP, TST followed by Bcc
 static bool isArithmeticBccPair(const MachineInstr *FirstMI,
-                                const MachineInstr &SecondMI, bool CmpOnly) {
+                                const MachineInstr &SecondMI, bool CmpOnly) { 
   if (SecondMI.getOpcode() != AArch64::Bcc)
     return false;
 
@@ -29,13 +29,13 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
   if (FirstMI == nullptr)
     return true;
 
-  // If we're in CmpOnly mode, we only fuse arithmetic instructions that
-  // discard their result.
-  if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
-                   FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
-    return false;
-  }
-
+  // If we're in CmpOnly mode, we only fuse arithmetic instructions that 
+  // discard their result. 
+  if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR || 
+                   FirstMI->getOperand(0).getReg() == AArch64::WZR)) { 
+    return false; 
+  } 
+ 
   switch (FirstMI->getOpcode()) {
   case AArch64::ADDSWri:
   case AArch64::ADDSWrr:
@@ -387,11 +387,11 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
 
   // All checking functions assume that the 1st instr is a wildcard if it is
   // unspecified.
-  if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
-    bool CmpOnly = !ST.hasArithmeticBccFusion();
-    if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
-      return true;
-  }
+  if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) { 
+    bool CmpOnly = !ST.hasArithmeticBccFusion(); 
+    if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly)) 
+      return true; 
+  } 
   if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
     return true;
   if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 019220e3a5..82b610f995 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -408,11 +408,11 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
                          O.getReg() != CmpReg;
                 }))
               continue;
-
-            // Don't remove a move immediate that implicitly defines the upper
-            // bits as different.
-            if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
-              continue;
+ 
+            // Don't remove a move immediate that implicitly defines the upper 
+            // bits as different. 
+            if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0) 
+              continue; 
           }
 
           if (IsCopy)
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
index f90856d14b..2aeea84ae2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -24,7 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugInfoMetadata.h" 
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/raw_ostream.h"
@@ -240,14 +240,14 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
     return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
 }
 
-const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
-    const MachineFunction &MF) const {
-  if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
-    return CSR_AArch64_AAPCS_RegMask;
-
-  return nullptr;
-}
-
+const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask( 
+    const MachineFunction &MF) const { 
+  if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux()) 
+    return CSR_AArch64_AAPCS_RegMask; 
+ 
+  return nullptr; 
+} 
+ 
 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
   if (TT.isOSDarwin())
     return CSR_Darwin_AArch64_TLS_RegMask;
@@ -334,16 +334,16 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
 }
 
 bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
-  return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
-    return isReservedReg(MF, r);
-  });
+  return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) { 
+    return isReservedReg(MF, r); 
+  }); 
 }
 
 void AArch64RegisterInfo::emitReservedArgRegCallError(
     const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
-  F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
-    " function calls if any of the argument registers is reserved.")});
+  F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support" 
+    " function calls if any of the argument registers is reserved.")}); 
 }
 
 bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
@@ -525,16 +525,16 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                              Register BaseReg,
                                              int64_t Offset) const {
   assert(MI && "Unable to get the legal offset for nil instruction.");
-  StackOffset SaveOffset = StackOffset::getFixed(Offset);
+  StackOffset SaveOffset = StackOffset::getFixed(Offset); 
   return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
 }
 
 /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
 /// at the beginning of the basic block.
-Register
-AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
-                                                  int FrameIdx,
-                                                  int64_t Offset) const {
+Register 
+AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 
+                                                  int FrameIdx, 
+                                                  int64_t Offset) const { 
   MachineBasicBlock::iterator Ins = MBB->begin();
   DebugLoc DL; // Defaults to "unknown"
   if (Ins != MBB->end())
@@ -544,7 +544,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
   const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+  Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 
   MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
   unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
 
@@ -552,21 +552,21 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
       .addFrameIndex(FrameIdx)
       .addImm(Offset)
       .addImm(Shifter);
-
-  return BaseReg;
+ 
+  return BaseReg; 
 }
 
 void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                                             int64_t Offset) const {
   // ARM doesn't need the general 64-bit offsets
-  StackOffset Off = StackOffset::getFixed(Offset);
+  StackOffset Off = StackOffset::getFixed(Offset); 
 
   unsigned i = 0;
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
-
+ 
   const MachineFunction *MF = MI.getParent()->getParent();
   const AArch64InstrInfo *TII =
       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
@@ -596,33 +596,33 @@ createScratchRegisterForInstruction(MachineInstr &MI,
   }
 }
 
-void AArch64RegisterInfo::getOffsetOpcodes(
-    const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
-  // The smallest scalable element supported by scaled SVE addressing
-  // modes are predicates, which are 2 scalable bytes in size. So the scalable
-  // byte offset must always be a multiple of 2.
-  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
-  // Add fixed-sized offset using existing DIExpression interface.
-  DIExpression::appendOffset(Ops, Offset.getFixed());
-
-  unsigned VG = getDwarfRegNum(AArch64::VG, true);
-  int64_t VGSized = Offset.getScalable() / 2;
-  if (VGSized > 0) {
-    Ops.push_back(dwarf::DW_OP_constu);
-    Ops.push_back(VGSized);
-    Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
-    Ops.push_back(dwarf::DW_OP_mul);
-    Ops.push_back(dwarf::DW_OP_plus);
-  } else if (VGSized < 0) {
-    Ops.push_back(dwarf::DW_OP_constu);
-    Ops.push_back(-VGSized);
-    Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
-    Ops.push_back(dwarf::DW_OP_mul);
-    Ops.push_back(dwarf::DW_OP_minus);
-  }
-}
-
+void AArch64RegisterInfo::getOffsetOpcodes( 
+    const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const { 
+  // The smallest scalable element supported by scaled SVE addressing 
+  // modes are predicates, which are 2 scalable bytes in size. So the scalable 
+  // byte offset must always be a multiple of 2. 
+  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 
+ 
+  // Add fixed-sized offset using existing DIExpression interface. 
+  DIExpression::appendOffset(Ops, Offset.getFixed()); 
+ 
+  unsigned VG = getDwarfRegNum(AArch64::VG, true); 
+  int64_t VGSized = Offset.getScalable() / 2; 
+  if (VGSized > 0) { 
+    Ops.push_back(dwarf::DW_OP_constu); 
+    Ops.push_back(VGSized); 
+    Ops.append({dwarf::DW_OP_bregx, VG, 0ULL}); 
+    Ops.push_back(dwarf::DW_OP_mul); 
+    Ops.push_back(dwarf::DW_OP_plus); 
+  } else if (VGSized < 0) { 
+    Ops.push_back(dwarf::DW_OP_constu); 
+    Ops.push_back(-VGSized); 
+    Ops.append({dwarf::DW_OP_bregx, VG, 0ULL}); 
+    Ops.push_back(dwarf::DW_OP_mul); 
+    Ops.push_back(dwarf::DW_OP_minus); 
+  } 
+} 
+ 
 void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                               int SPAdj, unsigned FIOperandNum,
                                               RegScavenger *RS) const {
@@ -640,26 +640,26 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
   Register FrameReg;
 
-  // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
-  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
-      MI.getOpcode() == TargetOpcode::PATCHPOINT ||
-      MI.getOpcode() == TargetOpcode::STATEPOINT) {
+  // Special handling of dbg_value, stackmap patchpoint statepoint instructions. 
+  if (MI.getOpcode() == TargetOpcode::STACKMAP || 
+      MI.getOpcode() == TargetOpcode::PATCHPOINT || 
+      MI.getOpcode() == TargetOpcode::STATEPOINT) { 
     StackOffset Offset =
         TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
                                         /*PreferFP=*/true,
                                         /*ForSimm=*/false);
-    Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
+    Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); 
     MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); 
     return;
   }
 
   if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
-    StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
-    assert(!Offset.getScalable() &&
-           "Frame offsets with a scalable component are not supported");
-    FI.ChangeToImmediate(Offset.getFixed());
+    StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); 
+    assert(!Offset.getScalable() && 
+           "Frame offsets with a scalable component are not supported"); 
+    FI.ChangeToImmediate(Offset.getFixed()); 
     return;
   }
 
@@ -668,11 +668,11 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // TAGPstack must use the virtual frame register in its 3rd operand.
     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     FrameReg = MI.getOperand(3).getReg();
-    Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
-                                      AFI->getTaggedBasePointerOffset());
+    Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + 
+                                      AFI->getTaggedBasePointerOffset()); 
   } else if (Tagged) {
-    StackOffset SPOffset = StackOffset::getFixed(
-        MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
+    StackOffset SPOffset = StackOffset::getFixed( 
+        MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize()); 
     if (MFI.hasVarSizedObjects() ||
         isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
             (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
@@ -693,8 +693,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       return;
     }
     FrameReg = AArch64::SP;
-    Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
-                                   (int64_t)MFI.getStackSize());
+    Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + 
+                                   (int64_t)MFI.getStackSize()); 
   } else {
     Offset = TFI->resolveFrameIndexReference(
         MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
@@ -765,19 +765,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister(
     return getBaseRegister();
   return getFrameRegister(MF);
 }
-
-/// SrcRC and DstRC will be morphed into NewRC if this returns true
-bool AArch64RegisterInfo::shouldCoalesce(
-    MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
-    const TargetRegisterClass *DstRC, unsigned DstSubReg,
-    const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
-  if (MI->isCopy() &&
-      ((DstRC->getID() == AArch64::GPR64RegClassID) ||
-       (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
-      MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
-    // Do not coalesce in the case of a 32-bit subregister copy
-    // which implements a 32 to 64 bit zero extension
-    // which relies on the upper 32 bits being zeroed.
-    return false;
-  return true;
-}
+ 
+/// SrcRC and DstRC will be morphed into NewRC if this returns true 
+bool AArch64RegisterInfo::shouldCoalesce( 
+    MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, 
+    const TargetRegisterClass *DstRC, unsigned DstSubReg, 
+    const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { 
+  if (MI->isCopy() && 
+      ((DstRC->getID() == AArch64::GPR64RegClassID) || 
+       (DstRC->getID() == AArch64::GPR64commonRegClassID)) && 
+      MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg()) 
+    // Do not coalesce in the case of a 32-bit subregister copy 
+    // which implements a 32 to 64 bit zero extension 
+    // which relies on the upper 32 bits being zeroed. 
+    return false; 
+  return true; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
index 0c871ac089..b9a4e6ac16 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -72,10 +72,10 @@ public:
   // Funclets on ARM64 Windows don't preserve any registers.
   const uint32_t *getNoPreservedMask() const override;
 
-  // Unwinders may not preserve all Neon and SVE registers.
-  const uint32_t *
-  getCustomEHPadPreservedMask(const MachineFunction &MF) const override;
-
+  // Unwinders may not preserve all Neon and SVE registers. 
+  const uint32_t * 
+  getCustomEHPadPreservedMask(const MachineFunction &MF) const override; 
+ 
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i64 first argument if the calling convention
   /// is one that can (partially) model this attribute with a preserved mask
@@ -107,8 +107,8 @@ public:
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
                           int64_t Offset) const override;
-  Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
-                                        int64_t Offset) const override;
+  Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, 
+                                        int64_t Offset) const override; 
   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
                          int64_t Offset) const override;
   void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
@@ -128,15 +128,15 @@ public:
 
   unsigned getLocalAddressRegister(const MachineFunction &MF) const;
   bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
-
-  /// SrcRC and DstRC will be morphed into NewRC if this returns true
-  bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
-                      unsigned SubReg, const TargetRegisterClass *DstRC,
-                      unsigned DstSubReg, const TargetRegisterClass *NewRC,
-                      LiveIntervals &LIS) const override;
-
-  void getOffsetOpcodes(const StackOffset &Offset,
-                        SmallVectorImpl<uint64_t> &Ops) const override;
+ 
+  /// SrcRC and DstRC will be morphed into NewRC if this returns true 
+  bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, 
+                      unsigned SubReg, const TargetRegisterClass *DstRC, 
+                      unsigned DstSubReg, const TargetRegisterClass *NewRC, 
+                      LiveIntervals &LIS) const override; 
+ 
+  void getOffsetOpcodes(const StackOffset &Offset, 
+                        SmallVectorImpl<uint64_t> &Ops) const override; 
 };
 
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
index 28d1988b8a..17ad5b997c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -711,32 +711,32 @@ def XSeqPairClassOperand :
 
 //===----- END: v8.1a atomic CASP register operands -----------------------===//
 
-//===----------------------------------------------------------------------===//
-// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
-// starting with an even one
-
-let Namespace = "AArch64" in {
-  foreach i = 0-7 in
-    def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
-}
-
-def Tuples8X : RegisterTuples<
-  !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
-  !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
-
-def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
-def GPR64x8AsmOp : AsmOperandClass {
-  let Name = "GPR64x8";
-  let ParserMethod = "tryParseGPR64x8";
-  let RenderMethod = "addRegOperands";
-}
-def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
-  let ParserMatchClass = GPR64x8AsmOp;
-  let PrintMethod = "printGPR64x8";
-}
-
-//===----- END: v8.7a accelerator extension register operands -------------===//
-
+//===----------------------------------------------------------------------===// 
+// Armv8.7a accelerator extension register operands: 8 consecutive GPRs 
+// starting with an even one 
+ 
+let Namespace = "AArch64" in { 
+  foreach i = 0-7 in 
+    def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>; 
+} 
+ 
+def Tuples8X : RegisterTuples< 
+  !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)), 
+  !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>; 
+ 
+def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>; 
+def GPR64x8AsmOp : AsmOperandClass { 
+  let Name = "GPR64x8"; 
+  let ParserMethod = "tryParseGPR64x8"; 
+  let RenderMethod = "addRegOperands"; 
+} 
+def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> { 
+  let ParserMatchClass = GPR64x8AsmOp; 
+  let PrintMethod = "printGPR64x8"; 
+} 
+ 
+//===----- END: v8.7a accelerator extension register operands -------------===// 
+ 
 // SVE predicate registers
 def P0    : AArch64Reg<0,   "p0">, DwarfRegNum<[48]>;
 def P1    : AArch64Reg<1,   "p1">, DwarfRegNum<[49]>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 03b32967a2..84e6327550 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -221,9 +221,9 @@ shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
   // if so, return it.
   std::string Subtarget = std::string(SchedModel.getSubtargetInfo()->getCPU());
   auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget);
-  auto It = SIMDInstrTable.find(InstID);
-  if (It != SIMDInstrTable.end())
-    return It->second;
+  auto It = SIMDInstrTable.find(InstID); 
+  if (It != SIMDInstrTable.end()) 
+    return It->second; 
 
   unsigned SCIdx = InstDesc->getSchedClass();
   const MCSchedClassDesc *SCDesc =
@@ -291,9 +291,9 @@ bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) {
   case Interleave:
     std::string Subtarget =
         std::string(SchedModel.getSubtargetInfo()->getCPU());
-    auto It = InterlEarlyExit.find(Subtarget);
-    if (It != InterlEarlyExit.end())
-      return It->second;
+    auto It = InterlEarlyExit.find(Subtarget); 
+    if (It != InterlEarlyExit.end()) 
+      return It->second; 
 
     for (auto &I : IRT) {
       OriginalMCID = &TII->get(I.OrigOpc);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e09b8401c0..19a71f606b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -152,8 +152,8 @@ def AArch64fmaxv_p   : SDNode<"AArch64ISD::FMAXV_PRED",   SDT_AArch64Reduce>;
 def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
 def AArch64fminv_p   : SDNode<"AArch64ISD::FMINV_PRED",   SDT_AArch64Reduce>;
 def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
-def AArch64saddv_p   : SDNode<"AArch64ISD::SADDV_PRED",   SDT_AArch64Reduce>;
-def AArch64uaddv_p   : SDNode<"AArch64ISD::UADDV_PRED",   SDT_AArch64Reduce>;
+def AArch64saddv_p   : SDNode<"AArch64ISD::SADDV_PRED",   SDT_AArch64Reduce>; 
+def AArch64uaddv_p   : SDNode<"AArch64ISD::UADDV_PRED",   SDT_AArch64Reduce>; 
 def AArch64smaxv_p   : SDNode<"AArch64ISD::SMAXV_PRED",   SDT_AArch64Reduce>;
 def AArch64umaxv_p   : SDNode<"AArch64ISD::UMAXV_PRED",   SDT_AArch64Reduce>;
 def AArch64sminv_p   : SDNode<"AArch64ISD::SMINV_PRED",   SDT_AArch64Reduce>;
@@ -166,84 +166,84 @@ def AArch64lastb     : SDNode<"AArch64ISD::LASTB",        SDT_AArch64Reduce>;
 
 def SDT_AArch64Arith : SDTypeProfile<1, 3, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
-  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
+  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3> 
 ]>;
 
 def SDT_AArch64FMA : SDTypeProfile<1, 4, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
-  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
+  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4> 
 ]>;
 
 // Predicated operations with the result of inactive lanes being unspecified.
 def AArch64add_p  : SDNode<"AArch64ISD::ADD_PRED",  SDT_AArch64Arith>;
-def AArch64asr_p  : SDNode<"AArch64ISD::SRA_PRED",  SDT_AArch64Arith>;
+def AArch64asr_p  : SDNode<"AArch64ISD::SRA_PRED",  SDT_AArch64Arith>; 
 def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
-def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
+def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; 
 def AArch64fma_p  : SDNode<"AArch64ISD::FMA_PRED",  SDT_AArch64FMA>;
-def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
-def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
-def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
-def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
-def AArch64lsl_p  : SDNode<"AArch64ISD::SHL_PRED",  SDT_AArch64Arith>;
-def AArch64lsr_p  : SDNode<"AArch64ISD::SRL_PRED",  SDT_AArch64Arith>;
-def AArch64mul_p  : SDNode<"AArch64ISD::MUL_PRED",  SDT_AArch64Arith>;
+def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>; 
+def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>; 
+def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; 
+def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; 
+def AArch64lsl_p  : SDNode<"AArch64ISD::SHL_PRED",  SDT_AArch64Arith>; 
+def AArch64lsr_p  : SDNode<"AArch64ISD::SRL_PRED",  SDT_AArch64Arith>; 
+def AArch64mul_p  : SDNode<"AArch64ISD::MUL_PRED",  SDT_AArch64Arith>; 
 def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
-def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
-def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
-def AArch64sub_p  : SDNode<"AArch64ISD::SUB_PRED",  SDT_AArch64Arith>;
+def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; 
+def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; 
+def AArch64sub_p  : SDNode<"AArch64ISD::SUB_PRED",  SDT_AArch64Arith>; 
 def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
-def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
-def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
-
-def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
-  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
-  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
-]>;
-
-// Predicated operations with the result of inactive lanes provided by the last operand.
-def AArch64clz_mt    : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64cnt_mt    : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fneg_mt   : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fabs_mt   : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64abs_mt   : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64neg_mt   : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64sxt_mt    : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
-def AArch64uxt_mt    : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
-def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fsqrt_mt  : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64rbit_mt   : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64revb_mt   : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
-
-// These are like the above but we don't yet have need for ISD nodes. They allow
-// a single pattern to match intrinsic and ISD operand layouts.
-def AArch64cls_mt  : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls  node:$pt, node:$pg, node:$op)]>;
-def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
-def AArch64not_mt  : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not  node:$pt, node:$pg, node:$op)]>;
-
-def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
-  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
-  SDTCVecEltisVT<1,i1>
-]>;
-
-def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [
-  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>,
-  SDTCVecEltisVT<1,i1>
-]>;
-
-def AArch64fcvtr_mt  : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>;
-def AArch64fcvte_mt  : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64ucvtf_mt  : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64scvtf_mt  : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-
+def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; 
+def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; 
+
+def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ 
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>, 
+  SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4> 
+]>; 
+
+// Predicated operations with the result of inactive lanes provided by the last operand. 
+def AArch64clz_mt    : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64cnt_mt    : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64fneg_mt   : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64fabs_mt   : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64abs_mt   : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64neg_mt   : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64sxt_mt    : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; 
+def AArch64uxt_mt    : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; 
+def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64fsqrt_mt  : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64rbit_mt   : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+def AArch64revb_mt   : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>; 
+ 
+// These are like the above but we don't yet have need for ISD nodes. They allow 
+// a single pattern to match intrinsic and ISD operand layouts. 
+def AArch64cls_mt  : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls  node:$pt, node:$pg, node:$op)]>; 
+def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>; 
+def AArch64not_mt  : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not  node:$pt, node:$pg, node:$op)]>; 
+ 
+def SDT_AArch64FCVT : SDTypeProfile<1, 3, [ 
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, 
+  SDTCVecEltisVT<1,i1> 
+]>; 
+ 
+def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ 
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, 
+  SDTCVecEltisVT<1,i1> 
+]>; 
+ 
+def AArch64fcvtr_mt  : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; 
+def AArch64fcvte_mt  : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; 
+def AArch64ucvtf_mt  : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; 
+def AArch64scvtf_mt  : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; 
+def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; 
+def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; 
+ 
 def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
 def AArch64clasta_n   : SDNode<"AArch64ISD::CLASTA_N",   SDT_AArch64ReduceWithInit>;
 def AArch64clastb_n   : SDNode<"AArch64ISD::CLASTB_N",   SDT_AArch64ReduceWithInit>;
@@ -263,24 +263,24 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
 
 def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
 
-def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
-                               [(setoge node:$lhs, node:$rhs),
-                                (setge node:$lhs, node:$rhs)]>;
-def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setogt node:$lhs, node:$rhs),
-                                 (setgt node:$lhs, node:$rhs)]>;
-def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setoeq node:$lhs, node:$rhs),
-                                 (seteq node:$lhs, node:$rhs)]>;
-def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
-                                [(setone node:$lhs, node:$rhs),
-                                 (setne node:$lhs, node:$rhs)]>;
-def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
-                                  (AArch64mul_p node:$pred, node:$src1, node:$src2), [{
-  return N->hasOneUse();
-}]>;
-
-
+def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs), 
+                               [(setoge node:$lhs, node:$rhs), 
+                                (setge node:$lhs, node:$rhs)]>; 
+def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs), 
+                                [(setogt node:$lhs, node:$rhs), 
+                                 (setgt node:$lhs, node:$rhs)]>; 
+def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs), 
+                                [(setoeq node:$lhs, node:$rhs), 
+                                 (seteq node:$lhs, node:$rhs)]>; 
+def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs), 
+                                [(setone node:$lhs, node:$rhs), 
+                                 (setne node:$lhs, node:$rhs)]>; 
+def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), 
+                                  (AArch64mul_p node:$pred, node:$src1, node:$src2), [{ 
+  return N->hasOneUse(); 
+}]>; 
+ 
+ 
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
   def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -305,7 +305,7 @@ let Predicates = [HasSVE] in {
   defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
 
   defm ADD_ZPZZ  : sve_int_bin_pred_bhsd<AArch64add_p>;
-  defm SUB_ZPZZ  : sve_int_bin_pred_bhsd<AArch64sub_p>;
+  defm SUB_ZPZZ  : sve_int_bin_pred_bhsd<AArch64sub_p>; 
 
   let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
     defm ADD_ZPZZ  : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
@@ -328,12 +328,12 @@ let Predicates = [HasSVE] in {
 
   defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
   defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
-  defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>;
-  defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
+  defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>; 
+  defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>; 
 
   // SVE predicated integer reductions.
-  defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
-  defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>;
+  defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>; 
+  defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>; 
   defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
   defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
   defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
@@ -346,17 +346,17 @@ let Predicates = [HasSVE] in {
   defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
   defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
 
-  defm SMAX_ZI   : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
-  defm SMIN_ZI   : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
-  defm UMAX_ZI   : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
-  defm UMIN_ZI   : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
+  defm SMAX_ZI   : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; 
+  defm SMIN_ZI   : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; 
+  defm UMAX_ZI   : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>; 
+  defm UMIN_ZI   : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; 
 
-  defm MUL_ZI     : sve_int_arith_imm2<"mul", AArch64mul_p>;
-  defm MUL_ZPmZ   : sve_int_bin_pred_arit_2<0b000, "mul",   "MUL_ZPZZ",   int_aarch64_sve_mul,   DestructiveBinaryComm>;
-  defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
-  defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
+  defm MUL_ZI     : sve_int_arith_imm2<"mul", AArch64mul_p>; 
+  defm MUL_ZPmZ   : sve_int_bin_pred_arit_2<0b000, "mul",   "MUL_ZPZZ",   int_aarch64_sve_mul,   DestructiveBinaryComm>; 
+  defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>; 
+  defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>; 
 
-  defm MUL_ZPZZ   : sve_int_bin_pred_bhsd<AArch64mul_p>;
+  defm MUL_ZPZZ   : sve_int_bin_pred_bhsd<AArch64mul_p>; 
 
   defm SDIV_ZPmZ  : sve_int_bin_pred_arit_2_div<0b100, "sdiv",  "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
   defm UDIV_ZPmZ  : sve_int_bin_pred_arit_2_div<0b101, "udiv",  "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
@@ -372,34 +372,34 @@ let Predicates = [HasSVE] in {
   defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
   defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
 
-  defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>;
-  defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>;
-  defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>;
-  defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>;
-  defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>;
-  defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>;
-  defm ABS_ZPmZ  : sve_int_un_pred_arit_0<  0b110, "abs",  AArch64abs_mt>;
-  defm NEG_ZPmZ  : sve_int_un_pred_arit_0<  0b111, "neg",  AArch64neg_mt>;
-
-  defm CLS_ZPmZ  : sve_int_un_pred_arit_1<   0b000, "cls",  AArch64cls_mt>;
-  defm CLZ_ZPmZ  : sve_int_un_pred_arit_1<   0b001, "clz",  AArch64clz_mt>;
-  defm CNT_ZPmZ  : sve_int_un_pred_arit_1<   0b010, "cnt",  AArch64cnt_mt>;
-  defm CNOT_ZPmZ : sve_int_un_pred_arit_1<   0b011, "cnot", AArch64cnot_mt>;
-  defm NOT_ZPmZ  : sve_int_un_pred_arit_1<   0b110, "not",  AArch64not_mt>;
-  defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
-  defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
-
-  defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
-  defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
-  defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
-  defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
-  defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
-  defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
-
-  defm SMAX_ZPZZ  : sve_int_bin_pred_bhsd<AArch64smax_p>;
-  defm UMAX_ZPZZ  : sve_int_bin_pred_bhsd<AArch64umax_p>;
-  defm SMIN_ZPZZ  : sve_int_bin_pred_bhsd<AArch64smin_p>;
-  defm UMIN_ZPZZ  : sve_int_bin_pred_bhsd<AArch64umin_p>;
+  defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>; 
+  defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>; 
+  defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>; 
+  defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>; 
+  defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>; 
+  defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>; 
+  defm ABS_ZPmZ  : sve_int_un_pred_arit_0<  0b110, "abs",  AArch64abs_mt>; 
+  defm NEG_ZPmZ  : sve_int_un_pred_arit_0<  0b111, "neg",  AArch64neg_mt>; 
+
+  defm CLS_ZPmZ  : sve_int_un_pred_arit_1<   0b000, "cls",  AArch64cls_mt>; 
+  defm CLZ_ZPmZ  : sve_int_un_pred_arit_1<   0b001, "clz",  AArch64clz_mt>; 
+  defm CNT_ZPmZ  : sve_int_un_pred_arit_1<   0b010, "cnt",  AArch64cnt_mt>; 
+  defm CNOT_ZPmZ : sve_int_un_pred_arit_1<   0b011, "cnot", AArch64cnot_mt>; 
+  defm NOT_ZPmZ  : sve_int_un_pred_arit_1<   0b110, "not",  AArch64not_mt>; 
+  defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; 
+  defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; 
+
+  defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; 
+  defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; 
+  defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; 
+  defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>; 
+  defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; 
+  defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; 
+
+  defm SMAX_ZPZZ  : sve_int_bin_pred_bhsd<AArch64smax_p>; 
+  defm UMAX_ZPZZ  : sve_int_bin_pred_bhsd<AArch64umax_p>; 
+  defm SMIN_ZPZZ  : sve_int_bin_pred_bhsd<AArch64smin_p>; 
+  defm UMIN_ZPZZ  : sve_int_bin_pred_bhsd<AArch64umin_p>; 
 
   defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe",  int_aarch64_sve_frecpe_x>;
   defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
@@ -428,11 +428,11 @@ let Predicates = [HasSVE] in {
   defm FDIV_ZPmZ   : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
 
   defm FADD_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fadd_p>;
-  defm FSUB_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fsub_p>;
-  defm FMUL_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fmul_p>;
-  defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>;
-  defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
-  defm FDIV_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
+  defm FSUB_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fsub_p>; 
+  defm FMUL_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fmul_p>; 
+  defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>; 
+  defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>; 
+  defm FDIV_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fdiv_p>; 
 
   let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
     defm FADD_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@@ -449,10 +449,10 @@ let Predicates = [HasSVE] in {
     defm FDIV_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
   }
 
-  defm FADD_ZZZ    : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
-  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
-  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
-  defm FTSMUL_ZZZ  : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+  defm FADD_ZZZ    : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; 
+  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; 
+  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; 
+  defm FTSMUL_ZZZ  : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>; 
   defm FRECPS_ZZZ  : sve_fp_3op_u_zd<0b110, "frecps",  int_aarch64_sve_frecps_x>;
   defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
 
@@ -476,14 +476,14 @@ let Predicates = [HasSVE] in {
   // regalloc.
   def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
             (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
-  def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
-            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
-  def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
-            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+  def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)), 
+            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>; 
+  def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)), 
+            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>; 
   def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
             (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
-  def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
-            (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+  def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)), 
+            (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>; 
   def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
             (FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
 
@@ -534,8 +534,8 @@ let Predicates = [HasSVE] in {
             (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
   def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
             (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
-  def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
-            (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+  def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), 
+            (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; 
 
   // Duplicate +0.0 into all vector elements
   def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@@ -544,7 +544,7 @@ let Predicates = [HasSVE] in {
   def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
   def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
   def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
-  def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+  def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; 
 
   // Duplicate Int immediate into all vector elements
   def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
@@ -573,7 +573,7 @@ let Predicates = [HasSVE] in {
   }
 
   // Select elements from either vector (predicated)
-  defm SEL_ZPZZ   : sve_int_sel_vvv<"sel", vselect>;
+  defm SEL_ZPZZ   : sve_int_sel_vvv<"sel", vselect>; 
 
   defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>;
 
@@ -582,8 +582,8 @@ let Predicates = [HasSVE] in {
   defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
   defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
 
-  defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>;
-  defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>;
+  defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>; 
+  defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>; 
   defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
   defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
 
@@ -1035,7 +1035,7 @@ let Predicates = [HasSVE] in {
   def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
   def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
 
-  multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+  multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> { 
     // reg + imm
     let AddedComplexity = 2 in {
       def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
@@ -1145,29 +1145,29 @@ let Predicates = [HasSVE] in {
   def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
             (ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
 
-  // Extract subvectors from FP SVE vectors
-  def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
-            (UUNPKLO_ZZ_D ZPR:$Zs)>;
-  def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))),
-            (UUNPKHI_ZZ_D ZPR:$Zs)>;
-  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
-            (UUNPKLO_ZZ_S ZPR:$Zs)>;
-  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
-            (UUNPKHI_ZZ_S ZPR:$Zs)>;
-  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
-            (UUNPKLO_ZZ_D ZPR:$Zs)>;
-  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
-            (UUNPKHI_ZZ_D ZPR:$Zs)>;
-
-  def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))),
-            (UUNPKLO_ZZ_D ZPR:$Zs)>;
-  def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))),
-            (UUNPKHI_ZZ_D ZPR:$Zs)>;
-  def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
-            (UUNPKLO_ZZ_S ZPR:$Zs)>;
-  def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
-            (UUNPKHI_ZZ_S ZPR:$Zs)>;
-
+  // Extract subvectors from FP SVE vectors 
+  def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), 
+            (UUNPKLO_ZZ_D ZPR:$Zs)>; 
+  def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))), 
+            (UUNPKHI_ZZ_D ZPR:$Zs)>; 
+  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), 
+            (UUNPKLO_ZZ_S ZPR:$Zs)>; 
+  def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), 
+            (UUNPKHI_ZZ_S ZPR:$Zs)>; 
+  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))), 
+            (UUNPKLO_ZZ_D ZPR:$Zs)>; 
+  def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))), 
+            (UUNPKHI_ZZ_D ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))), 
+            (UUNPKLO_ZZ_D ZPR:$Zs)>; 
+  def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))), 
+            (UUNPKHI_ZZ_D ZPR:$Zs)>; 
+  def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))), 
+            (UUNPKLO_ZZ_S ZPR:$Zs)>; 
+  def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))), 
+            (UUNPKHI_ZZ_S ZPR:$Zs)>; 
+ 
   // Concatenate two predicates.
   def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
             (UZP1_PPP_S $p1, $p2)>;
@@ -1176,18 +1176,18 @@ let Predicates = [HasSVE] in {
   def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
             (UZP1_PPP_B $p1, $p2)>;
 
-  // Concatenate two floating point vectors.
-  def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
-            (UZP1_ZZZ_S $v1, $v2)>;
-  def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
-            (UZP1_ZZZ_H $v1, $v2)>;
-  def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
-            (UZP1_ZZZ_S $v1, $v2)>;
-  def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)),
-            (UZP1_ZZZ_S $v1, $v2)>;
-  def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)),
-            (UZP1_ZZZ_H $v1, $v2)>;
-
+  // Concatenate two floating point vectors. 
+  def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), 
+            (UZP1_ZZZ_S $v1, $v2)>; 
+  def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), 
+            (UZP1_ZZZ_H $v1, $v2)>; 
+  def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), 
+            (UZP1_ZZZ_S $v1, $v2)>; 
+  def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)), 
+            (UZP1_ZZZ_S $v1, $v2)>; 
+  def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)), 
+            (UZP1_ZZZ_H $v1, $v2)>; 
+ 
   defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
   defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
   defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
@@ -1217,10 +1217,10 @@ let Predicates = [HasSVE] in {
   defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
   defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
 
-  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
-  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
-  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
-  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
+  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>; 
+  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>; 
+  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>; 
+  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>; 
   defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
   defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
   defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -1345,146 +1345,146 @@ let Predicates = [HasSVE] in {
   defm INDEX_II : sve_int_index_ii<"index", index_vector>;
 
   // Unpredicated shifts
-  defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
-  defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>;
-  defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>;
+  defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; 
+  defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>; 
+  defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>; 
 
   defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
   defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
   defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
 
   // Predicated shifts
-  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr",  "ASR_ZPZI",  int_aarch64_sve_asr>;
-  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr",  "LSR_ZPZI",  int_aarch64_sve_lsr>;
-  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl",  "LSL_ZPZI",  int_aarch64_sve_lsl>;
-  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
-
-  defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
-  defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
-  defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
-
+  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr",  "ASR_ZPZI",  int_aarch64_sve_asr>; 
+  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr",  "LSR_ZPZI",  int_aarch64_sve_lsr>; 
+  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl",  "LSL_ZPZI",  int_aarch64_sve_lsl>; 
+  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<    0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; 
+
+  defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 
+  defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; 
+  defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>; 
+ 
   let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
-    defm ASR_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
-    defm LSR_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
-    defm LSL_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
+    defm ASR_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>; 
+    defm LSR_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>; 
+    defm LSL_ZPZZ    : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>; 
     defm ASRD_ZPZI   : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
   }
 
-  defm ASR_ZPmZ  : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
-  defm LSR_ZPmZ  : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
-  defm LSL_ZPmZ  : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
+  defm ASR_ZPmZ  : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; 
+  defm LSR_ZPmZ  : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; 
+  defm LSL_ZPmZ  : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; 
   defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
   defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
   defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
 
-  defm ASR_ZPZZ  : sve_int_bin_pred_bhsd<AArch64asr_p>;
-  defm LSR_ZPZZ  : sve_int_bin_pred_bhsd<AArch64lsr_p>;
-  defm LSL_ZPZZ  : sve_int_bin_pred_bhsd<AArch64lsl_p>;
-
+  defm ASR_ZPZZ  : sve_int_bin_pred_bhsd<AArch64asr_p>; 
+  defm LSR_ZPZZ  : sve_int_bin_pred_bhsd<AArch64lsr_p>; 
+  defm LSL_ZPZZ  : sve_int_bin_pred_bhsd<AArch64lsl_p>; 
+ 
   defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
   defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
   defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
 
-  defm FCVT_ZPmZ_StoH   : sve_fp_2op_p_zdr<0b1001000, "fcvt",   ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32,   AArch64fcvtr_mt,  nxv4f16, nxv4i1, nxv4f32, ElementSizeS>;
-  defm FCVT_ZPmZ_HtoS   : sve_fp_2op_p_zd< 0b1001001, "fcvt",   ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16,   AArch64fcvte_mt,  nxv4f32, nxv4i1, nxv4f16, ElementSizeS>;
-  defm SCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd< 0b0110010, "scvtf",  ZPR16, ZPR16, null_frag,                     AArch64scvtf_mt,  nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
-  defm SCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd< 0b1010100, "scvtf",  ZPR32, ZPR32, null_frag,                     AArch64scvtf_mt,  nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
-  defm UCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd< 0b1010101, "ucvtf",  ZPR32, ZPR32, null_frag,                     AArch64ucvtf_mt,  nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
-  defm UCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd< 0b0110011, "ucvtf",  ZPR16, ZPR16, null_frag,                     AArch64ucvtf_mt,  nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
-  defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag,                     AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
-  defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag,                     AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
-  defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag,                     AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
-  defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag,                     AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
-  defm FCVT_ZPmZ_DtoH   : sve_fp_2op_p_zdr<0b1101000, "fcvt",   ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64,   AArch64fcvtr_mt,  nxv2f16, nxv2i1, nxv2f64, ElementSizeD>;
-  defm FCVT_ZPmZ_HtoD   : sve_fp_2op_p_zd< 0b1101001, "fcvt",   ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
-  defm FCVT_ZPmZ_DtoS   : sve_fp_2op_p_zdr<0b1101010, "fcvt",   ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64,   AArch64fcvtr_mt,  nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
-  defm FCVT_ZPmZ_StoD   : sve_fp_2op_p_zd< 0b1101011, "fcvt",   ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
-  defm SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110000, "scvtf",  ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32,  AArch64scvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
-  defm UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110001, "ucvtf",  ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32,  AArch64ucvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
-  defm UCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110101, "ucvtf",  ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32,  AArch64ucvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
-  defm SCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd< 0b1110100, "scvtf",  ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64,  AArch64scvtf_mt,  nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
-  defm SCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110100, "scvtf",  ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32,  AArch64scvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
-  defm SCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd< 0b0110110, "scvtf",  ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64,  AArch64scvtf_mt,  nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
-  defm UCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd< 0b1110101, "ucvtf",  ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64,  AArch64ucvtf_mt,  nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
-  defm UCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd< 0b0110111, "ucvtf",  ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64,  AArch64ucvtf_mt,  nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
-  defm SCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd< 0b1110110, "scvtf",  ZPR64, ZPR64, null_frag,                     AArch64scvtf_mt,  nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
-  defm UCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd< 0b1110111, "ucvtf",  ZPR64, ZPR64, null_frag,                     AArch64ucvtf_mt,  nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
-  defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag,        nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
-  defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag,        nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
-  defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
-  defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
-  defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
-  defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
-  defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
-  defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
-  defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag,                     AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
-  defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag,                     AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
-
-  def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
-            (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  // FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
-  // This is ignored by the pattern below where it is matched by (i64 timm0_1)
-  def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
-            (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  // Floating-point -> signed integer
-  def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
-                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
-            (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
-                      (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
-            (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
-                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
-            (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
-                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
-            (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
-                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
-            (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  // Floating-point -> unsigned integer
-  def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
-                      (and (nxv2i64 ZPR:$Zs),
-                       (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
-            (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
-                      (and (nxv2i64 ZPR:$Zs),
-                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
-            (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
-                      (and (nxv4i32 ZPR:$Zs),
-                       (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
-            (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
-                      (and (nxv2i64 ZPR:$Zs),
-                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
-            (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
-                      (and (nxv2i64 ZPR:$Zs),
-                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
-            (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
-  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
-  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
-  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
-  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;
-  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;
-  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;
-  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
-  defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
-  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  AArch64fsqrt_mt>;
-
+  defm FCVT_ZPmZ_StoH   : sve_fp_2op_p_zdr<0b1001000, "fcvt",   ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32,   AArch64fcvtr_mt,  nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; 
+  defm FCVT_ZPmZ_HtoS   : sve_fp_2op_p_zd< 0b1001001, "fcvt",   ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16,   AArch64fcvte_mt,  nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; 
+  defm SCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd< 0b0110010, "scvtf",  ZPR16, ZPR16, null_frag,                     AArch64scvtf_mt,  nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; 
+  defm SCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd< 0b1010100, "scvtf",  ZPR32, ZPR32, null_frag,                     AArch64scvtf_mt,  nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; 
+  defm UCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd< 0b1010101, "ucvtf",  ZPR32, ZPR32, null_frag,                     AArch64ucvtf_mt,  nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; 
+  defm UCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd< 0b0110011, "ucvtf",  ZPR16, ZPR16, null_frag,                     AArch64ucvtf_mt,  nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; 
+  defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag,                     AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; 
+  defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag,                     AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; 
+  defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag,                     AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; 
+  defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag,                     AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; 
+  defm FCVT_ZPmZ_DtoH   : sve_fp_2op_p_zdr<0b1101000, "fcvt",   ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64,   AArch64fcvtr_mt,  nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; 
+  defm FCVT_ZPmZ_HtoD   : sve_fp_2op_p_zd< 0b1101001, "fcvt",   ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; 
+  defm FCVT_ZPmZ_DtoS   : sve_fp_2op_p_zdr<0b1101010, "fcvt",   ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64,   AArch64fcvtr_mt,  nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; 
+  defm FCVT_ZPmZ_StoD   : sve_fp_2op_p_zd< 0b1101011, "fcvt",   ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32,   AArch64fcvte_mt,  nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; 
+  defm SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110000, "scvtf",  ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32,  AArch64scvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; 
+  defm UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd< 0b1110001, "ucvtf",  ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32,  AArch64ucvtf_mt,  nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; 
+  defm UCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110101, "ucvtf",  ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32,  AArch64ucvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; 
+  defm SCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd< 0b1110100, "scvtf",  ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64,  AArch64scvtf_mt,  nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm SCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd< 0b0110100, "scvtf",  ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32,  AArch64scvtf_mt,  nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; 
+  defm SCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd< 0b0110110, "scvtf",  ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64,  AArch64scvtf_mt,  nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm UCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd< 0b1110101, "ucvtf",  ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64,  AArch64ucvtf_mt,  nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm UCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd< 0b0110111, "ucvtf",  ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64,  AArch64ucvtf_mt,  nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm SCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd< 0b1110110, "scvtf",  ZPR64, ZPR64, null_frag,                     AArch64scvtf_mt,  nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm UCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd< 0b1110111, "ucvtf",  ZPR64, ZPR64, null_frag,                     AArch64ucvtf_mt,  nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; 
+  defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag,        nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; 
+  defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag,        nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; 
+  defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; 
+  defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; 
+  defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; 
+  defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; 
+  defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; 
+  defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; 
+  defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag,                     AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; 
+  defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag,                     AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; 
+
+  def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), 
+            (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+
+  // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. 
+  // This is ignored by the pattern below where it is matched by (i64 timm0_1) 
+  def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), 
+            (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  // Floating-point -> signed integer 
+  def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), 
+                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), 
+            (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg), 
+                      (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), 
+            (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), 
+                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), 
+            (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), 
+                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), 
+            (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), 
+                      (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), 
+            (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  // Floating-point -> unsigned integer 
+  def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), 
+                      (and (nxv2i64 ZPR:$Zs), 
+                       (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), 
+            (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), 
+                      (and (nxv2i64 ZPR:$Zs), 
+                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), 
+            (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg), 
+                      (and (nxv4i32 ZPR:$Zs), 
+                       (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), 
+            (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), 
+                      (and (nxv2i64 ZPR:$Zs), 
+                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), 
+            (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), 
+                      (and (nxv2i64 ZPR:$Zs), 
+                       (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), 
+            (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; 
+ 
+  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; 
+  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; 
+  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>; 
+  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>; 
+  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>; 
+  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>; 
+  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>; 
+  defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>; 
+  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  AArch64fsqrt_mt>; 
+ 
   let Predicates = [HasBF16, HasSVE] in {
     defm BFDOT_ZZZ    : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
     defm BFDOT_ZZI    : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
@@ -1648,9 +1648,9 @@ let Predicates = [HasSVE] in {
     def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
   }
 
-  def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
-            (ADDVL_XXI GPR64:$op, $imm)>;
-
+  def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))), 
+            (ADDVL_XXI GPR64:$op, $imm)>; 
+ 
   // FIXME: BigEndian requires an additional REV instruction to satisfy the
   // constraint that none of the bits change when stored to memory as one
   // type, and and reloaded as another type.
@@ -1721,7 +1721,7 @@ let Predicates = [HasSVE] in {
     def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
   }
 
-  // These allow casting from/to unpacked predicate types.
+  // These allow casting from/to unpacked predicate types. 
   def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
   def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
   def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
@@ -1736,18 +1736,18 @@ let Predicates = [HasSVE] in {
   def : Pat<(nxv2i1 (reinterpret_cast  (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
   def : Pat<(nxv2i1 (reinterpret_cast  (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
 
-  // These allow casting from/to unpacked floating-point types.
-  def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-  def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-
+  // These allow casting from/to unpacked floating-point types. 
+  def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+  def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; 
+ 
   def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
             (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
   def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
@@ -1800,7 +1800,7 @@ let Predicates = [HasSVE] in {
   defm : pred_load<nxv8i16,  nxv8i1, asext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
   defm : pred_load<nxv8i16,  nxv8i1, nonext_masked_load,   LD1H,    LD1H_IMM,    am_sve_regreg_lsl1>;
   defm : pred_load<nxv8f16,  nxv8i1, nonext_masked_load,   LD1H,    LD1H_IMM,    am_sve_regreg_lsl1>;
-  defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load,   LD1H,    LD1H_IMM,    am_sve_regreg_lsl1>;
+  defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load,   LD1H,    LD1H_IMM,    am_sve_regreg_lsl1>; 
 
   // 16-element contiguous loads
   defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B, LD1B_IMM, am_sve_regreg_lsl0>;
@@ -1838,10 +1838,10 @@ let Predicates = [HasSVE] in {
   defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store,  ST1W,   ST1W_IMM,   am_sve_regreg_lsl2>;
 
   // 8-element contiguous stores
-  defm : pred_store<nxv8i16,  nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
-  defm : pred_store<nxv8i16,  nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>;
-  defm : pred_store<nxv8f16,  nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>;
-  defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>;
+  defm : pred_store<nxv8i16,  nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>; 
+  defm : pred_store<nxv8i16,  nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>; 
+  defm : pred_store<nxv8f16,  nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>; 
+  defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H,   ST1H_IMM,   am_sve_regreg_lsl1>; 
 
   // 16-element contiguous stores
   defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B, ST1B_IMM, am_sve_regreg_lsl0>;
@@ -2003,7 +2003,7 @@ let Predicates = [HasSVE] in {
   defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16,  AArch64ld1s_z, nxv8i1, nxv8i8,   am_sve_regreg_lsl0>;
   defm : ld1<LD1H,    LD1H_IMM,    nxv8i16,  AArch64ld1_z,  nxv8i1, nxv8i16,  am_sve_regreg_lsl1>;
   defm : ld1<LD1H,    LD1H_IMM,    nxv8f16,  AArch64ld1_z,  nxv8i1, nxv8f16,  am_sve_regreg_lsl1>;
-  defm : ld1<LD1H,    LD1H_IMM,    nxv8bf16, AArch64ld1_z,  nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+  defm : ld1<LD1H,    LD1H_IMM,    nxv8bf16, AArch64ld1_z,  nxv8i1, nxv8bf16, am_sve_regreg_lsl1>; 
 
   // 16-element contiguous loads
   defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -2043,7 +2043,7 @@ let Predicates = [HasSVE] in {
   defm : ldnf1<LDNF1SB_H_IMM, nxv8i16,  AArch64ldnf1s_z, nxv8i1, nxv8i8>;
   defm : ldnf1<LDNF1H_IMM,    nxv8i16,  AArch64ldnf1_z,  nxv8i1, nxv8i16>;
   defm : ldnf1<LDNF1H_IMM,    nxv8f16,  AArch64ldnf1_z,  nxv8i1, nxv8f16>;
-  defm : ldnf1<LDNF1H_IMM,    nxv8bf16, AArch64ldnf1_z,  nxv8i1, nxv8bf16>;
+  defm : ldnf1<LDNF1H_IMM,    nxv8bf16, AArch64ldnf1_z,  nxv8i1, nxv8bf16>; 
 
   // 16-element contiguous non-faulting loads
   defm : ldnf1<LDNF1B_IMM,    nxv16i8,  AArch64ldnf1_z, nxv16i1, nxv16i8>;
@@ -2084,7 +2084,7 @@ let Predicates = [HasSVE] in {
   defm : ldff1<LDFF1SB_H, nxv8i16,  AArch64ldff1s_z, nxv8i1, nxv8i8,   am_sve_regreg_lsl0>;
   defm : ldff1<LDFF1H,    nxv8i16,  AArch64ldff1_z,  nxv8i1, nxv8i16,  am_sve_regreg_lsl1>;
   defm : ldff1<LDFF1H,    nxv8f16,  AArch64ldff1_z,  nxv8i1, nxv8f16,  am_sve_regreg_lsl1>;
-  defm : ldff1<LDFF1H,    nxv8bf16, AArch64ldff1_z,  nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+  defm : ldff1<LDFF1H,    nxv8bf16, AArch64ldff1_z,  nxv8i1, nxv8bf16, am_sve_regreg_lsl1>; 
 
   // 16-element contiguous first faulting loads
   defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -2135,19 +2135,19 @@ let Predicates = [HasSVE] in {
   def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
             (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
 
-  def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
-            (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
-  def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
-            (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
-  def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
-            (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
-  def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
-            (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
-  def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
-            (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
-  def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
-            (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-
+  def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)), 
+            (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 
+  def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)), 
+            (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 
+  def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)), 
+            (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>; 
+  def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)), 
+            (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 
+  def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)), 
+            (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>; 
+  def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)), 
+            (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 
+ 
   // Insert scalar into vector[0]
   def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
             (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
@@ -2211,28 +2211,28 @@ let Predicates = [HasSVE] in {
                                        (DUP_ZR_D $index)),
                         $src)>;
 
-  // Extract element from vector with scalar index
-  def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
-            (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
-            (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
-            (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
-            (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
-  def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
-            (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
-
+  // Extract element from vector with scalar index 
+  def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)), 
+            (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)), 
+            (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)), 
+            (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)), 
+            (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 
+  def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)), 
+            (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>; 
+ 
   // Extract element from vector with immediate index
   def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
             (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
@@ -2244,54 +2244,54 @@ let Predicates = [HasSVE] in {
             (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
   def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
             (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
-  def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
-            (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
-  def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
-            (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
-  def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
-            (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
+  def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), 
+            (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; 
+  def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)), 
+            (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; 
+  def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), 
+            (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; 
   def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
             (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
-  def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
-            (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
+  def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)), 
+            (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>; 
   def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
             (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
 
-  // Extract element from vector with immediate index that's within the bottom 128-bits.
-  let AddedComplexity = 1 in {
-  def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
-            (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
-  def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
-            (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
-  def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
-            (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
-  def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
-            (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
-  }
-
-  // Extract first element from vector.
-  let AddedComplexity = 2 in {
-  def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
-            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
-  def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
-            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
-  def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
-            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
-  def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
-            (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
-  def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
-            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
-  def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
-            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
-  def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
-            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
-  def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
-            (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
-  def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
-            (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
-  def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
-            (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
-  }
+  // Extract element from vector with immediate index that's within the bottom 128-bits. 
+  let AddedComplexity = 1 in { 
+  def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), 
+            (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>; 
+  def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), 
+            (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>; 
+  def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)), 
+            (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>; 
+  def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)), 
+            (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>; 
+  } 
+
+  // Extract first element from vector. 
+  let AddedComplexity = 2 in { 
+  def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)), 
+            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; 
+  def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)), 
+            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; 
+  def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)), 
+            (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; 
+  def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)), 
+            (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; 
+  def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), 
+            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; 
+  def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)), 
+            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; 
+  def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)), 
+            (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>; 
+  def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), 
+            (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; 
+  def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)), 
+            (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>; 
+  def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), 
+            (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; 
+  } 
 }
 
 let Predicates = [HasSVE, HasMatMulInt8] in {
@@ -2350,10 +2350,10 @@ let Predicates = [HasSVE2] in {
   defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
 
   // SVE2 integer multiply vectors (unpredicated)
-  defm MUL_ZZZ    : sve2_int_mul<0b000,  "mul",   null_frag, AArch64mul_p>;
+  defm MUL_ZZZ    : sve2_int_mul<0b000,  "mul",   null_frag, AArch64mul_p>; 
   defm SMULH_ZZZ  : sve2_int_mul<0b010,  "smulh", null_frag>;
   defm UMULH_ZZZ  : sve2_int_mul<0b011,  "umulh", null_frag>;
-  defm PMUL_ZZZ   : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
+  defm PMUL_ZZZ   : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>; 
 
   // Add patterns for unpredicated version of smulh and umulh.
   def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
@@ -2372,7 +2372,7 @@ let Predicates = [HasSVE2] in {
             (UMULH_ZZZ_S $Op1, $Op2)>;
   def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
             (UMULH_ZZZ_D $Op1, $Op2)>;
-
+ 
   // SVE2 complex integer dot product (indexed)
   defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>;
 
@@ -2494,11 +2494,11 @@ let Predicates = [HasSVE2] in {
   }
 
   // SVE2 predicated shifts
-  defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl",  "SQSHL_ZPZI">;
-  defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl",  "UQSHL_ZPZI">;
-  defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100, "srshr",  "SRSHR_ZPZI",  int_aarch64_sve_srshr>;
-  defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr",  "URSHR_ZPZI",  int_aarch64_sve_urshr>;
-  defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
+  defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl",  "SQSHL_ZPZI">; 
+  defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl",  "UQSHL_ZPZI">; 
+  defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100, "srshr",  "SRSHR_ZPZI",  int_aarch64_sve_srshr>; 
+  defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr",  "URSHR_ZPZI",  int_aarch64_sve_urshr>; 
+  defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; 
 
   // SVE2 integer add/subtract long
   defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
index 50911fd22b..0b45a3ba09 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
@@ -1,339 +1,339 @@
-//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the ARM Cortex-A55 processors.
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
-
-// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
-def CortexA55Model : SchedMachineModel {
-  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
-  let IssueWidth = 2;         // It dual-issues under most circumstances
-  let LoadLatency = 4;        // Cycles for loads to access the cache. The
-                              // optimisation guide shows that most loads have
-                              // a latency of 3, but some have a latency of 4
-                              // or 5. Setting it 4 looked to be good trade-off.
-  let MispredictPenalty = 8;  // A branch direction mispredict.
-  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
-  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
-
-  list<Predicate> UnsupportedFeatures = [HasSVE];
-
-  // FIXME: Remove when all errors have been fixed.
-  let FullInstRWOverlapCheck = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available.
-
-// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
-// Cortex-A55 is in-order.
-
-def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
-def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
-def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
-def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
-def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
-def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
-
-// The FP DIV/SQRT instructions execute totally differently from the FP ALU
-// instructions, which can mostly be dual-issued; that's why for now we model
-// them with 2 resources.
-def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
-def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
-def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types
-
-let SchedModel = CortexA55Model in {
-
-// These latencies are modeled without taking into account forwarding paths
-// (the software optimisation guide lists latencies taking into account
-// typical forwarding paths).
-def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
-def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
-def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
-def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
-def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
-def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
-
-// MAC
-def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
-def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
-
-// Div
-def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
-  let Latency = 8; let ResourceCycles = [8];
-}
-def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
-  let Latency = 8; let ResourceCycles = [8];
-}
-
-// Load
-def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
-def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
-def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
-
-// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
-//               below, choosing the median of 3 which makes the latency 6.
-// An extra cycle is needed to get the swizzling right.
-def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
-                                           let ResourceCycles = [3]; }
-def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
-def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
-                                                  let ResourceCycles = [2]; }
-def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
-                                                  let ResourceCycles = [3]; }
-def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
-                                                  let ResourceCycles = [4]; }
-def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
-                                                  let ResourceCycles = [5]; }
-def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
-                                                  let ResourceCycles = [6]; }
-def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
-                                                  let ResourceCycles = [7]; }
-def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
-                                                  let ResourceCycles = [8]; }
-
-// Pre/Post Indexing - Performed as part of address generation
-def : WriteRes<WriteAdr, []> { let Latency = 0; }
-
-// Store
-def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
-
-// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
-def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
-                                          let ResourceCycles = [2];}
-def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
-def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
-                                                  let ResourceCycles = [2]; }
-def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
-                                                  let ResourceCycles = [3]; }
-def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
-                                                  let ResourceCycles = [4]; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-
-// Branch
-def : WriteRes<WriteBr, [CortexA55UnitB]>;
-def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
-def : WriteRes<WriteSys, [CortexA55UnitB]>;
-def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
-def : WriteRes<WriteHint, [CortexA55UnitB]>;
-
-// FP ALU
-//   As WriteF result is produced in F5 and it can be mostly forwarded
-//   to consumer at F1, the effectively latency is set as 4.
-def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
-def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
-def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
-
-// FP ALU specific new schedwrite definitions
-def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
-def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
-def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
-
-// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
-def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
-def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
-                                            let ResourceCycles = [29]; }
-def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
-def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
-                                                     let ResourceCycles = [5]; }
-def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
-                                                     let ResourceCycles = [10]; }
-def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
-                                                     let ResourceCycles = [19]; }
-def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
-                                                      let ResourceCycles = [5]; }
-def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
-                                                      let ResourceCycles = [9]; }
-def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
-                                                      let ResourceCycles = [19]; }
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedRead types.
-
-def : ReadAdvance<ReadVLD, 0>;
-def : ReadAdvance<ReadExtrHi, 1>;
-def : ReadAdvance<ReadAdrBase, 1>;
-
-// ALU - ALU input operands are generally needed in EX1. An operand produced in
-//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
-//       allowing back-to-back ALU operations such as add. If an operand requires
-//       a shift, it will, however, be required in ISS stage.
-def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
-                             WriteISReg, WriteIEReg,WriteIS,
-                             WriteID32,WriteID64,
-                             WriteIM32,WriteIM64]>;
-// Shifted operand
-def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
-                                          WriteISReg, WriteIEReg,WriteIS,
-                                          WriteID32,WriteID64,
-                                          WriteIM32,WriteIM64]>;
-def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
-                                             WriteISReg, WriteIEReg,WriteIS,
-                                             WriteID32,WriteID64,
-                                             WriteIM32,WriteIM64]>;
-def CortexA55ReadISReg : SchedReadVariant<[
-        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
-        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
-def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
-
-def CortexA55ReadIEReg : SchedReadVariant<[
-        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
-        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
-def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
-
-// MUL
-def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
-                              WriteISReg, WriteIEReg,WriteIS,
-                              WriteID32,WriteID64,
-                              WriteIM32,WriteIM64]>;
-def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
-                               WriteISReg, WriteIEReg,WriteIS,
-                               WriteID32,WriteID64,
-                               WriteIM32,WriteIM64]>;
-
-// Div
-def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
-                              WriteISReg, WriteIEReg,WriteIS,
-                              WriteID32,WriteID64,
-                              WriteIM32,WriteIM64]>;
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific InstRWs.
-
-//---
-// Miscellaneous
-//---
-def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
-def : InstRW<[WriteI], (instrs COPY)>;
-//---
-// Vector Loads - 64-bit per cycle
-//---
-//   1-element structures
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
-
-//    2-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
-
-//    3-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
-
-//    4-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
-def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
-
-//---
-// Vector Stores
-//---
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-//---
-// Floating Point Conversions, MAC, DIV, SQRT
-//---
-def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
-
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
-
-def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
-def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
-def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
-def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
-def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
-def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
-def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
-def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
-def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
-def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
-def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-}
+//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file defines the machine model for the ARM Cortex-A55 processors. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+// ===---------------------------------------------------------------------===// 
+// The following definitions describe the per-operand machine model. 
+// This works with MachineScheduler. See MCSchedModel.h for details. 
+ 
+// Cortex-A55 machine model for scheduling and other instruction cost heuristics. 
+def CortexA55Model : SchedMachineModel { 
+  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor 
+  let IssueWidth = 2;         // It dual-issues under most circumstances 
+  let LoadLatency = 4;        // Cycles for loads to access the cache. The 
+                              // optimisation guide shows that most loads have 
+                              // a latency of 3, but some have a latency of 4 
+                              // or 5. Setting it 4 looked to be good trade-off. 
+  let MispredictPenalty = 8;  // A branch direction mispredict. 
+  let PostRAScheduler = 1;    // Enable PostRA scheduler pass. 
+  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55. 
+ 
+  list<Predicate> UnsupportedFeatures = [HasSVE]; 
+ 
+  // FIXME: Remove when all errors have been fixed. 
+  let FullInstRWOverlapCheck = 0; 
+} 
+ 
+//===----------------------------------------------------------------------===// 
+// Define each kind of processor resource and number available. 
+ 
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the 
+// Cortex-A55 is in-order. 
+ 
+def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU 
+def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide 
+def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined 
+def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe 
+def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe 
+def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch 
+ 
+// The FP DIV/SQRT instructions execute totally differently from the FP ALU 
+// instructions, which can mostly be dual-issued; that's why for now we model 
+// them with 2 resources. 
+def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU 
+def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC 
+def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128 
+ 
+//===----------------------------------------------------------------------===// 
+// Subtarget-specific SchedWrite types 
+ 
+let SchedModel = CortexA55Model in { 
+ 
+// These latencies are modeled without taking into account forwarding paths 
+// (the software optimisation guide lists latencies taking into account 
+// typical forwarding paths). 
+def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ 
+def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU 
+def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg 
+def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg 
+def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair 
+def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale 
+ 
+// MAC 
+def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply 
+def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply 
+ 
+// Div 
+def : WriteRes<WriteID32, [CortexA55UnitDiv]> { 
+  let Latency = 8; let ResourceCycles = [8]; 
+} 
+def : WriteRes<WriteID64, [CortexA55UnitDiv]> { 
+  let Latency = 8; let ResourceCycles = [8]; 
+} 
+ 
+// Load 
+def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; } 
+def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; } 
+def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; } 
+ 
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd 
+//               below, choosing the median of 3 which makes the latency 6. 
+// An extra cycle is needed to get the swizzling right. 
+def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6; 
+                                           let ResourceCycles = [3]; } 
+def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; } 
+def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; 
+                                                  let ResourceCycles = [2]; } 
+def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6; 
+                                                  let ResourceCycles = [3]; } 
+def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7; 
+                                                  let ResourceCycles = [4]; } 
+def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8; 
+                                                  let ResourceCycles = [5]; } 
+def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9; 
+                                                  let ResourceCycles = [6]; } 
+def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10; 
+                                                  let ResourceCycles = [7]; } 
+def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11; 
+                                                  let ResourceCycles = [8]; } 
+ 
+// Pre/Post Indexing - Performed as part of address generation 
+def : WriteRes<WriteAdr, []> { let Latency = 0; } 
+ 
+// Store 
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; } 
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; } 
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; } 
+def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; } 
+ 
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. 
+def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5; 
+                                          let ResourceCycles = [2];} 
+def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; } 
+def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; 
+                                                  let ResourceCycles = [2]; } 
+def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6; 
+                                                  let ResourceCycles = [3]; } 
+def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; 
+                                                  let ResourceCycles = [4]; } 
+ 
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 
+ 
+// Branch 
+def : WriteRes<WriteBr, [CortexA55UnitB]>; 
+def : WriteRes<WriteBrReg, [CortexA55UnitB]>; 
+def : WriteRes<WriteSys, [CortexA55UnitB]>; 
+def : WriteRes<WriteBarrier, [CortexA55UnitB]>; 
+def : WriteRes<WriteHint, [CortexA55UnitB]>; 
+ 
+// FP ALU 
+//   As WriteF result is produced in F5 and it can be mostly forwarded 
+//   to consumer at F1, the effectively latency is set as 4. 
+def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; } 
+def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; } 
+def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; } 
+def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; } 
+def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; } 
+def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; } 
+ 
+// FP ALU specific new schedwrite definitions 
+def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;} 
+def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;} 
+def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;} 
+ 
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined 
+def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; } 
+def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22; 
+                                            let ResourceCycles = [29]; } 
+def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; } 
+def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; 
+                                                     let ResourceCycles = [5]; } 
+def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13; 
+                                                     let ResourceCycles = [10]; } 
+def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; 
+                                                     let ResourceCycles = [19]; } 
+def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; 
+                                                      let ResourceCycles = [5]; } 
+def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12; 
+                                                      let ResourceCycles = [9]; } 
+def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; 
+                                                      let ResourceCycles = [19]; } 
+ 
+//===----------------------------------------------------------------------===// 
+// Subtarget-specific SchedRead types. 
+ 
+def : ReadAdvance<ReadVLD, 0>; 
+def : ReadAdvance<ReadExtrHi, 1>; 
+def : ReadAdvance<ReadAdrBase, 1>; 
+ 
+// ALU - ALU input operands are generally needed in EX1. An operand produced in 
+//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby 
+//       allowing back-to-back ALU operations such as add. If an operand requires 
+//       a shift, it will, however, be required in ISS stage. 
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI, 
+                             WriteISReg, WriteIEReg,WriteIS, 
+                             WriteID32,WriteID64, 
+                             WriteIM32,WriteIM64]>; 
+// Shifted operand 
+def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, 
+                                          WriteISReg, WriteIEReg,WriteIS, 
+                                          WriteID32,WriteID64, 
+                                          WriteIM32,WriteIM64]>; 
+def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, 
+                                             WriteISReg, WriteIEReg,WriteIS, 
+                                             WriteID32,WriteID64, 
+                                             WriteIM32,WriteIM64]>; 
+def CortexA55ReadISReg : SchedReadVariant<[ 
+        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>, 
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>; 
+def : SchedAlias<ReadISReg, CortexA55ReadISReg>; 
+ 
+def CortexA55ReadIEReg : SchedReadVariant<[ 
+        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>, 
+        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>; 
+def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>; 
+ 
+// MUL 
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, 
+                              WriteISReg, WriteIEReg,WriteIS, 
+                              WriteID32,WriteID64, 
+                              WriteIM32,WriteIM64]>; 
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI, 
+                               WriteISReg, WriteIEReg,WriteIS, 
+                               WriteID32,WriteID64, 
+                               WriteIM32,WriteIM64]>; 
+ 
+// Div 
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI, 
+                              WriteISReg, WriteIEReg,WriteIS, 
+                              WriteID32,WriteID64, 
+                              WriteIM32,WriteIM64]>; 
+ 
+//===----------------------------------------------------------------------===// 
+// Subtarget-specific InstRWs. 
+ 
+//--- 
+// Miscellaneous 
+//--- 
+def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>; 
+def : InstRW<[WriteI], (instrs COPY)>; 
+//--- 
+// Vector Loads - 64-bit per cycle 
+//--- 
+//   1-element structures 
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element 
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate 
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>; 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures 
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>; 
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; 
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; 
+ 
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; 
+ 
+//    2-element structures 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; 
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 
+ 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; 
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; 
+ 
+//    3-element structures 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>; 
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>; 
+ 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; 
+ 
+//    4-element structures 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs. 
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs. 
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs. 
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 
+ 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; 
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 
+ 
+//--- 
+// Vector Stores 
+//--- 
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>; 
+def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; 
+ 
+def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>; 
+def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; 
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+//--- 
+// Floating Point Conversions, MAC, DIV, SQRT 
+//--- 
+def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>; 
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>; 
+ 
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>; 
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>; 
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>; 
+ 
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; 
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>; 
+def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>; 
+def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>; 
+def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>; 
+def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>; 
+def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>; 
+def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>; 
+def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>; 
+def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 
+def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
index aa5bec8088..0ee50541c0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
@@ -93,7 +93,7 @@ def : SchedAlias<WriteFCmp,  A57Write_3cyc_1V>;
 def : SchedAlias<WriteFCvt,  A57Write_5cyc_1V>;
 def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
 def : SchedAlias<WriteFImm,  A57Write_3cyc_1V>;
-def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
+def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;} 
 def : SchedAlias<WriteFDiv,  A57Write_17cyc_1W>;
 def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
@@ -350,16 +350,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr],      (instregex "ST4Fourv(2d)_POST$")
 //   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
 //   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
 
-// Cortex A57 Software Optimization Guide Sec 3.14
-// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
-def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
-
+// Cortex A57 Software Optimization Guide Sec 3.14 
+// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate 
+def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>; 
+ 
 // ASIMD absolute diff accum, D-form
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 
 // ASIMD absolute diff accum, Q-form
-def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 
 // ASIMD absolute diff accum long
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>; 
 
 // ASIMD arith, reduce, 4H/4S
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
@@ -376,41 +376,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
 def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
 
 // ASIMD multiply, D-form
-// MUL
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
-// PMUL, SQDMULH, SQRDMULH
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
-
+// MUL 
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; 
+// PMUL, SQDMULH, SQRDMULH 
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; 
+ 
 // ASIMD multiply, Q-form
-// MUL
-def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
-// PMUL, SQDMULH, SQRDMULH
-def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// Cortex A57 Software Optimization Guide Sec 3.14
-def A57ReadIVMA4   : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
-def A57ReadIVMA3   : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
-
+// MUL 
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>; 
+// PMUL, SQDMULH, SQRDMULH 
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 
+
+// Cortex A57 Software Optimization Guide Sec 3.14 
+def A57ReadIVMA4   : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; 
+def A57ReadIVMA3   : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; 
+ 
 // ASIMD multiply accumulate, D-form
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 
 // ASIMD multiply accumulate, Q-form
-def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 
 
 // ASIMD multiply accumulate long
 // ASIMD multiply accumulate saturating long
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>; 
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>; 
 
 // ASIMD multiply long
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
-def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>; 
+def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>; 
 def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
 def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
 
 // ASIMD pairwise add and accumulate
 // ASIMD shift accumulate
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>; 
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>; 
 
 // ASIMD shift by immed, complex
 def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
@@ -487,22 +487,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
 def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
 
 // ASIMD FP multiply, D-form, FZ
-def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 
 // ASIMD FP multiply, Q-form, FZ
-def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 
 
 // ASIMD FP multiply accumulate, D-form, FZ
 // ASIMD FP multiply accumulate, Q-form, FZ
 def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10;  }
-
-// Cortex A57 Software Optimization Guide Sec 3.15
-// Advances from FP mul and mul-accum to mul-accum
-def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
-def A57ReadFPVMA6  : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
-
+ 
+// Cortex A57 Software Optimization Guide Sec 3.15 
+// Advances from FP mul and mul-accum to mul-accum 
+def A57ReadFPVMA5  : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>; 
+def A57ReadFPVMA6  : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>; 
+ 
 def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 
 
 // ASIMD FP round, D-form
 def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
@@ -565,9 +565,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v
 
 def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
 
-// Cortex A57 Software Optimization Guide Sec 3.10
+// Cortex A57 Software Optimization Guide Sec 3.10 
 def A57WriteFPMA  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
-def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
+def A57ReadFPMA5  : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>; 
 def A57ReadFPM    : SchedReadAdvance<0>;
 def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index a4c090d439..2ec3233887 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -13,11 +13,11 @@
 //   Prefix: A57Write
 //   Latency: #cyc
 //   MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
-//   Postfix (optional): (XYZ)_Forward
-//
-//   The postfix is added to differentiate SchedWriteRes that are used in
-//   subsequent SchedReadAdvances.
+//   Postfix (optional): (XYZ)_Forward 
 //
+//   The postfix is added to differentiate SchedWriteRes that are used in 
+//   subsequent SchedReadAdvances. 
+// 
 // e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
 //      11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
 //
@@ -29,9 +29,9 @@
 def A57Write_5cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 5;  }
 def A57Write_5cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 5;  }
 def A57Write_5cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
-def A57Write_5cyc_1V_FP_Forward  : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1V_FP_Forward  : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 
 def A57Write_5cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
-def A57Write_5cyc_1W_Mul_Forward  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
+def A57Write_5cyc_1W_Mul_Forward  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  } 
 def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
 def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
                                                     let ResourceCycles = [17]; }
@@ -51,7 +51,7 @@ def A57Write_3cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 3;  }
 def A57Write_3cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 3;  }
 def A57Write_4cyc_1L  : SchedWriteRes<[A57UnitL]> { let Latency = 4;  }
 def A57Write_4cyc_1X  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
-def A57Write_4cyc_1X_NonMul_Forward  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  }
+def A57Write_4cyc_1X_NonMul_Forward  : SchedWriteRes<[A57UnitX]> { let Latency = 4;  } 
 def A57Write_9cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 9;  }
 def A57Write_6cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 6;  }
 def A57Write_6cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 6;  }
@@ -100,10 +100,10 @@ def A57Write_6cyc_2W     : SchedWriteRes<[A57UnitW, A57UnitW]> {
   let Latency     = 6;
   let NumMicroOps = 2;
 }
-def A57Write_6cyc_2W_Mul_Forward     : SchedWriteRes<[A57UnitW, A57UnitW]> {
-  let Latency     = 6;
-  let NumMicroOps = 2;
-}
+def A57Write_6cyc_2W_Mul_Forward     : SchedWriteRes<[A57UnitW, A57UnitW]> { 
+  let Latency     = 6; 
+  let NumMicroOps = 2; 
+} 
 def A57Write_5cyc_1I_1L  : SchedWriteRes<[A57UnitI,
                                           A57UnitL]> {
   let Latency     = 5;
@@ -113,18 +113,18 @@ def A57Write_5cyc_2V     : SchedWriteRes<[A57UnitV, A57UnitV]> {
   let Latency     = 5;
   let NumMicroOps = 2;
 }
-def A57Write_5cyc_2V_FP_Forward     : SchedWriteRes<[A57UnitV, A57UnitV]> {
-  let Latency     = 5;
-  let NumMicroOps = 2;
-}
+def A57Write_5cyc_2V_FP_Forward     : SchedWriteRes<[A57UnitV, A57UnitV]> { 
+  let Latency     = 5; 
+  let NumMicroOps = 2; 
+} 
 def A57Write_5cyc_2X     : SchedWriteRes<[A57UnitX, A57UnitX]> {
   let Latency     = 5;
   let NumMicroOps = 2;
 }
-def A57Write_5cyc_2X_NonMul_Forward     : SchedWriteRes<[A57UnitX, A57UnitX]> {
-  let Latency     = 5;
-  let NumMicroOps = 2;
-}
+def A57Write_5cyc_2X_NonMul_Forward     : SchedWriteRes<[A57UnitX, A57UnitX]> { 
+  let Latency     = 5; 
+  let NumMicroOps = 2; 
+} 
 def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
                                           A57UnitV]> {
   let Latency     = 10;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
index b6741d418e..3c5a8d033d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -1,3890 +1,3890 @@
-//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the scheduling model for the Fujitsu A64FX processors.
-//
-//===----------------------------------------------------------------------===//
-
-def A64FXModel : SchedMachineModel {
-  let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
-  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
-  let LoadLatency           =   5; // Optimistic load latency.
-  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
-  // Determined via a mix of micro-arch details and experimentation.
-  let LoopMicroOpBufferSize = 128;
-  let PostRAScheduler       =   1; // Using PostRA sched.
-  let CompleteModel         =   1;
-
-  list<Predicate> UnsupportedFeatures =
-    [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
-
-  let FullInstRWOverlapCheck = 0;
-}
-
-let SchedModel = A64FXModel in {
-
-// Define the issue ports.
-
-// A64FXIP*
-
-// Port 0
-def A64FXIPFLA : ProcResource<1>;
-
-// Port 1
-def A64FXIPPR : ProcResource<1>;
-
-// Port 2
-def A64FXIPEXA : ProcResource<1>;
-
-// Port 3
-def A64FXIPFLB : ProcResource<1>;
-
-// Port 4
-def A64FXIPEXB : ProcResource<1>;
-
-// Port 5
-def A64FXIPEAGA : ProcResource<1>;
-
-// Port 6
-def A64FXIPEAGB : ProcResource<1>;
-
-// Port 7
-def A64FXIPBR : ProcResource<1>;
-
-// Define groups for the functional units on each issue port.  Each group
-// created will be used by a WriteRes later on.
-
-def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
-
-def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
-
-def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
-
-def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
-
-def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
-
-def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
-
-def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
-
-def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
-
-def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
-
-def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
-
-def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>;
-
-def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>;
-
-def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>;
-
-def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>;
-
-def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
-
-def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>;
-
-def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
-                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> {
-  let BufferSize = 60;
-}
-
-def A64FXWrite_6Cyc : SchedWriteRes<[]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 2;
-}
-
-def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 4;
-}
-
-def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 9;
-}
-
-def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 13;
-}
-
-def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 37;
-}
-
-def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 98;
-}
-
-def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 134;
-}
-
-def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 154;
-}
-
-def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
-  let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
-  let Latency = 12;
-}
-
-def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
-  let Latency = 17;
-}
-
-def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
-  let Latency = 21;
-}
-
-def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
-  let Latency = 3;
-}
-
-def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> {
-  let Latency = 3;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> {
-  let Latency = 4;
-}
-
-def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
-  let Latency = 3;
-}
-
-def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
-  let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 3;
-}
-
-def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 9;
-}
-
-def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 12;
-}
-
-def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 15;
-}
-
-def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 15;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 18;
-}
-
-def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 45;
-}
-
-def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 60;
-}
-
-def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 75;
-}
-
-def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
-  let Latency = 12;
-}
-
-def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
-  let Latency = 20;
-}
-
-def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
-  let Latency = 11;
-}
-
-def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
-  let Latency = 2;
-}
-
-def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
-  let Latency = 4;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-}
-
-def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
-  let Latency = 44;
-}
-
-def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
-  let Latency = 15;
-}
-
-def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
-  let Latency = 19;
-}
-
-def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
-  let Latency = 25;
-}
-
-def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
-  let Latency = 19;
-}
-
-def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
-  let Latency = 29;
-}
-
-def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 5;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
-  let Latency = 5;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
-  let Latency = 5;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 2;
-
-}
-
-def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 11;
-  let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 6;
-}
-
-def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 7;
-}
-
-def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 8;
-}
-
-def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-  let NumMicroOps = 9;
-}
-
-def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 25;
-}
-
-def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 12;
-}
-
-def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 12;
-  let NumMicroOps = 6;
-}
-
-def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-  let NumMicroOps = 6;
-}
-
-def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 9;
-}
-
-def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 8;
-}
-
-
-def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 8;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-}
-
-
-def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 15;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 15;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 15;
-  let NumMicroOps = 2;
-}
-
-def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-  let NumMicroOps = 7;
-}
-
-def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 14;
-}
-
-def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 5;
-}
-
-def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-}
-
-def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 9;
-}
-
-def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 12;
-}
-
-def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 25;
-}
-
-def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 3;
-}
-
-def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 5;
-}
-
-def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 7;
-}
-
-def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 10;
-  let NumMicroOps = 9;
-}
-
-def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 0;
-}
-
-def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 1;
-}
-
-def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 6;
-}
-
-def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 7;
-}
-
-def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 8;
-}
-
-def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 9;
-}
-
-def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 15;
-}
-
-def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> {
-  let Latency = 19;
-}
-
-def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
-  let Latency = 7;
-}
-
-// Define commonly used read types.
-
-// No forwarding is provided for these types.
-def : ReadAdvance<ReadI,       0>;
-def : ReadAdvance<ReadISReg,   0>;
-def : ReadAdvance<ReadIEReg,   0>;
-def : ReadAdvance<ReadIM,      0>;
-def : ReadAdvance<ReadIMA,     0>;
-def : ReadAdvance<ReadID,      0>;
-def : ReadAdvance<ReadExtrHi,  0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD,     0>;
-
-//===----------------------------------------------------------------------===//
-// 3. Instruction Tables.
-
-//---
-// 3.1 Branch Instructions
-//---
-
-// Branch, immed
-// Branch and link, immed
-// Compare and branch
-def : WriteRes<WriteBr,      [A64FXGI7]> {
-  let Latency = 1;
-}
-
-// Branch, register
-// Branch and link, register != LR
-// Branch and link, register = LR
-def : WriteRes<WriteBrReg,   [A64FXGI7]> {
-  let Latency = 1;
-}
-
-def : WriteRes<WriteSys,     []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint,    []> { let Latency = 1; }
-
-def : WriteRes<WriteAtomic,  []> {
-  let Latency = 4;
-}
-
-//---
-// Branch
-//---
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
-def : InstRW<[A64FXWrite_1Cyc_GI7],
-            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
-
-//---
-// 3.2 Arithmetic and Logical Instructions
-// 3.3 Move and Shift Instructions
-//---
-
-// ALU, basic
-// Conditional compare
-// Conditional select
-// Address generation
-def : WriteRes<WriteI,       [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteI],
-            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
-                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC(W|X)r",
-                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
-                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
-                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
-                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
-                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
-                       "CSINC(W|X)r",           "CSINV(W|X)r",
-                       "CSNEG(W|X)r")>;
-
-def : InstRW<[WriteI], (instrs COPY)>;
-
-// ALU, extend and/or shift
-def : WriteRes<WriteISReg,   [A64FXGI2456]> {
-  let Latency = 2;
-  let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteISReg],
-            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
-                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC(W|X)r",
-                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
-                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
-                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
-                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
-                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
-                       "CSINC(W|X)r",           "CSINV(W|X)r",
-                       "CSNEG(W|X)r")>;
-
-def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteIEReg],
-            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
-                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
-                       "ADC(W|X)r",
-                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
-                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
-                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
-                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
-                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
-                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
-                       "CSINC(W|X)r",           "CSINV(W|X)r",
-                       "CSNEG(W|X)r")>;
-
-// Move immed
-def : WriteRes<WriteImm,     [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-def : InstRW<[A64FXWrite_1Cyc_GI2456],
-            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
-
-def : InstRW<[A64FXWrite_2Cyc_GI24],
-            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
-
-// Variable shift
-def : WriteRes<WriteIS,      [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-//---
-// 3.4 Divide and Multiply Instructions
-//---
-
-// Divide, W-form
-def : WriteRes<WriteID32,    [A64FXGI4]> {
-  let Latency = 39;
-  let ResourceCycles = [39];
-}
-
-// Divide, X-form
-def : WriteRes<WriteID64,    [A64FXGI4]> {
-  let Latency = 23;
-  let ResourceCycles = [23];
-}
-
-// Multiply accumulate, W-form
-def : WriteRes<WriteIM32,    [A64FXGI2456]> {
-  let Latency = 5;
-  let ResourceCycles = [1];
-}
-
-// Multiply accumulate, X-form
-def : WriteRes<WriteIM64,    [A64FXGI2456]> {
-  let Latency = 5;
-  let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
-def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
-def : InstRW<[A64FXWrite_MADDL],
-            (instregex "(S|U)(MADDL|MSUBL)rrr")>;
-
-def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
-def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
-
-// Bitfield extract, two reg
-def : WriteRes<WriteExtr,    [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-// Multiply high
-def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
-
-// Miscellaneous Data-Processing Instructions
-// Bitfield extract
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
-
-// Bitifield move - basic
-def : InstRW<[A64FXWrite_1Cyc_GI24],
-            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
-
-// Bitfield move, insert
-def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
-
-// Count leading
-def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
-                                               "^CLZ(W|X)r$")>;
-
-// Reverse bits
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
-
-// Cryptography Extensions
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
-def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
-def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
-def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
-
-// CRC Instructions
-def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
-def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
-def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
-
-def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
-def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
-def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
-
-// Reverse bits/bytes
-// NOTE: Handled by WriteI.
-
-//---
-// 3.6 Load Instructions
-// 3.10 FP Load Instructions
-//---
-
-// Load register, literal
-// Load register, unscaled immed
-// Load register, immed unprivileged
-// Load register, unsigned immed
-def : WriteRes<WriteLD,      [A64FXGI56]> {
-  let Latency = 4;
-  let ResourceCycles = [3];
-}
-
-// Load register, immed post-index
-// NOTE: Handled by WriteLD, WriteI.
-// Load register, immed pre-index
-// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr,     [A64FXGI2456]> {
-  let Latency = 1;
-  let ResourceCycles = [1];
-}
-
-// Load pair, immed offset, normal
-// Load pair, immed offset, signed words, base != SP
-// Load pair, immed offset signed words, base = SP
-// LDP only breaks into *one* LS micro-op.  Thus
-// the resources are handled by WriteLD.
-def : WriteRes<WriteLDHi,    []> {
-  let Latency = 5;
-}
-
-// Load register offset, basic
-// Load register, register offset, scale by 4/8
-// Load register, register offset, scale by 2
-// Load register offset, extend
-// Load register, register offset, extend, scale by 4/8
-// Load register, register offset, extend, scale by 2
-def A64FXWriteLDIdx : SchedWriteVariant<[
-  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
-  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
-def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
-
-def A64FXReadAdrBase : SchedReadVariant<[
-  SchedVar<ScaledIdxPred, [ReadDefault]>,
-  SchedVar<NoSchedPred,   [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
-
-// Load pair, immed pre-index, normal
-// Load pair, immed pre-index, signed words
-// Load pair, immed post-index, normal
-// Load pair, immed post-index, signed words
-// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
-
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
-
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPDpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPQpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPSpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPWpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPWpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPDpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPQpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPSpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPWpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPDpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPQpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPSpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPWpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPXpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPDpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPQpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPSpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPWpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
-            (instrs LDPXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-             (instrs LDRDroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRHHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRQroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSHWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSHXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRBroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRDroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRHHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRQroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSHWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRSHXroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
-            (instrs LDRXroX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
-
-//---
-// Prefetch
-//---
-def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
-
-//--
-// 3.7 Store Instructions
-// 3.11 FP Store Instructions
-//--
-
-// Store register, unscaled immed
-// Store register, immed unprivileged
-// Store register, unsigned immed
-def : WriteRes<WriteST,      [A64FXGI56]> {
-  let Latency = 1;
-}
-
-// Store register, immed post-index
-// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
-
-// Store register, immed pre-index
-// NOTE: Handled by WriteAdr, WriteST
-
-// Store register, register offset, basic
-// Store register, register offset, scaled by 4/8
-// Store register, register offset, scaled by 2
-// Store register, register offset, extend
-// Store register, register offset, extend, scale by 4/8
-// Store register, register offset, extend, scale by 1
-def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
-  let Latency = 1;
-}
-
-// Store pair, immed offset, W-form
-// Store pair, immed offset, X-form
-def : WriteRes<WriteSTP,     [A64FXGI56]> {
-  let Latency = 1;
-}
-
-// Store pair, immed post-index, W-form
-// Store pair, immed post-index, X-form
-// Store pair, immed pre-index, W-form
-// Store pair, immed pre-index, X-form
-// NOTE: Handled by WriteAdr, WriteSTP.
-
-def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
-
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
-
-def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
-
-def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
-
-def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
-
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01],
-            (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
-            (instrs STPXpre, STPXpost)>;
-
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
-            (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
-            (instrs STRXpre, STRXpost)>;
-
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRBroW, STRBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRBroW, STRBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRBBroW, STRBBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRBBroW, STRBBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRDroW, STRDroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRDroW, STRDroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRHroW, STRHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRHroW, STRHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRHHroW, STRHHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRHHroW, STRHHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRQroW, STRQroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRQroW, STRQroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRSroW, STRSroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRSroW, STRSroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRWroW, STRWroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRWroW, STRWroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRXroW, STRXroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
-            (instrs STRXroW, STRXroX)>;
-
-//---
-// 3.8 FP Data Processing Instructions
-//---
-
-// FP absolute value
-// FP min/max
-// FP negate
-def : WriteRes<WriteF,       [A64FXGI03]> {
-  let Latency = 4;
-  let ResourceCycles = [2];
-}
-
-// FP arithmetic
-
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
-
-// FP compare
-def : WriteRes<WriteFCmp,    [A64FXGI03]> {
-  let Latency = 4;
-  let ResourceCycles = [2];
-}
-
-// FP Div, Sqrt
-def : WriteRes<WriteFDiv, [A64FXGI0]> {
-  let Latency = 43;
-}
-
-def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 38;
-}
-
-def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 29;
-}
-
-def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 43;
-}
-
-def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 29;
-}
-
-def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
-  let Latency = 43;
-}
-
-// FP divide, S-form
-// FP square root, S-form
-def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
-
-// FP divide, D-form
-// FP square root, D-form
-def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
-
-// FP multiply
-// FP multiply accumulate
-def : WriteRes<WriteFMul, [A64FXGI03]> {
-  let Latency = 9;
-  let ResourceCycles = [2];
-}
-
-def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 9;
-  let ResourceCycles = [2];
-}
-
-def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> {
-  let Latency = 9;
-  let ResourceCycles = [2];
-}
-
-def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>;
-def : InstRW<[A64FXXWriteFMulAcc],
-            (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
-
-// FP round to integral
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
-
-// FP select
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
-
-//---
-// 3.9 FP Miscellaneous Instructions
-//---
-
-// FP convert, from vec to vec reg
-// FP convert, from gen to vec reg
-// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [A64FXGI03]> {
-  let Latency = 9;
-  let ResourceCycles = [2];
-}
-
-// FP move, immed
-// FP move, register
-def : WriteRes<WriteFImm, [A64FXGI0]> {
-  let Latency = 4;
-  let ResourceCycles = [2];
-}
-
-// FP transfer, from gen to vec reg
-// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [A64FXGI0]> {
-  let Latency = 4;
-  let ResourceCycles = [2];
-}
-
-def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
-def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
-
-//---
-// 3.12 ASIMD Integer Instructions
-//---
-
-// ASIMD absolute diff, D-form
-// ASIMD absolute diff, Q-form
-// ASIMD absolute diff accum, D-form
-// ASIMD absolute diff accum, Q-form
-// ASIMD absolute diff accum long
-// ASIMD absolute diff long
-// ASIMD arith, basic
-// ASIMD arith, complex
-// ASIMD compare
-// ASIMD logical (AND, BIC, EOR)
-// ASIMD max/min, basic
-// ASIMD max/min, reduce, 4H/4S
-// ASIMD max/min, reduce, 8B/8H
-// ASIMD max/min, reduce, 16B
-// ASIMD multiply, D-form
-// ASIMD multiply, Q-form
-// ASIMD multiply accumulate long
-// ASIMD multiply accumulate saturating long
-// ASIMD multiply long
-// ASIMD pairwise add and accumulate
-// ASIMD shift accumulate
-// ASIMD shift by immed, basic
-// ASIMD shift by immed and insert, basic, D-form
-// ASIMD shift by immed and insert, basic, Q-form
-// ASIMD shift by immed, complex
-// ASIMD shift by register, basic, D-form
-// ASIMD shift by register, basic, Q-form
-// ASIMD shift by register, complex, D-form
-// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [A64FXGI03]> {
-  let Latency = 4;
-  let ResourceCycles = [1];
-}
-
-// ASIMD arith, reduce, 4H/4S
-// ASIMD arith, reduce, 8B/8H
-// ASIMD arith, reduce, 16B
-
-// ASIMD logical (MVN (alias for NOT), ORN, ORR)
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
-
-// ASIMD arith, reduce
-def : InstRW<[A64FXWrite_ADDLV],
-            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
-
-// ASIMD polynomial (8x8) multiply long
-def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
-def : InstRW<[A64FXWrite_MULLV],
-            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
-def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
-def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
-
-// ASIMD absolute diff accum, D-form
-def : InstRW<[A64FXWrite_ABA],
-            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
-// ASIMD absolute diff accum, Q-form
-def : InstRW<[A64FXWrite_ABA],
-            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
-// ASIMD absolute diff accum long
-def : InstRW<[A64FXWrite_ABAL],
-            (instregex "^[SU]ABAL")>;
-// ASIMD arith, reduce, 4H/4S
-def : InstRW<[A64FXWrite_ADDLV1],
-            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
-// ASIMD arith, reduce, 8B
-def : InstRW<[A64FXWrite_ADDLV1],
-            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
-// ASIMD arith, reduce, 16B/16H
-def : InstRW<[A64FXWrite_ADDLV1],
-            (instregex "^[SU]?ADDL?Vv16i8v$")>;
-// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[A64FXWrite_MINMAXV],
-            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
-// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[A64FXWrite_MINMAXV],
-            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
-// ASIMD max/min, reduce, 16B/16H
-def : InstRW<[A64FXWrite_MINMAXV],
-            (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
-// ASIMD multiply, D-form
-def : InstRW<[A64FXWrite_PMUL],
-            (instregex "^(P?MUL|SQR?DMUL)" #
-                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
-                       "(_indexed)?$")>;
-
-// ASIMD multiply, Q-form
-def : InstRW<[A64FXWrite_PMUL],
-            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// ASIMD multiply, Q-form
-def : InstRW<[A64FXWrite_SQRDMULH],
-            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// ASIMD multiply accumulate, D-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
-// ASIMD multiply accumulate, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
-// ASIMD shift accumulate
-def : InstRW<[A64FXWrite_SRSRAV],
-            (instregex "SRSRAv", "URSRAv")>;
-def : InstRW<[A64FXWrite_SSRAV],
-            (instregex "SSRAv", "USRAv")>;
-
-// ASIMD shift by immed, basic
-def : InstRW<[A64FXWrite_RSHRN],
-            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
-def : InstRW<[A64FXWrite_SHRN],
-            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
-
-def : InstRW<[A64FXWrite_6Cyc_GI3],
-            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
-
-// ASIMD shift by immed, complex
-def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
-// ASIMD shift by register, basic, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
-            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
-// ASIMD shift by register, complex, D-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
-            (instregex "^[SU][QR]{1,2}SHL" #
-                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
-// ASIMD shift by register, complex, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
-            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
-
-// ASIMD Arithmetic
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
-def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
-                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
-def : InstRW<[A64FXWrite_ADDP],
-            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
-                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
-def : InstRW<[A64FXWrite_4Cyc_GI0],
-            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
-def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
-def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
-def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
-def : InstRW<[A64FXWrite_MINMAXV],
-             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
-def : InstRW<[A64FXWrite_ABA],
-             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
-def : InstRW<[A64FXWrite_SHRN],
-            (instregex "^ADDHNv", "^SUBHNv")>;
-def : InstRW<[A64FXWrite_RSHRN],
-            (instregex "^RADDHNv", "^RSUBHNv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
-                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
-                      "^URHADD", "^USQADD")>;
-
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^CMEQv", "^CMGEv", "^CMGTv",
-                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
-def : InstRW<[A64FXWrite_MINMAXV],
-            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
-def : InstRW<[A64FXWrite_ADDP],
-            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^SABDv", "^UABDv")>;
-def : InstRW<[A64FXWrite_TBX1],
-            (instregex "^SABDLv", "^UABDLv")>;
-
-//---
-// 3.13 ASIMD Floating-point Instructions
-//---
-
-// ASIMD FP absolute value
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
-
-// ASIMD FP arith, normal, D-form
-// ASIMD FP arith, normal, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^FABDv", "^FADDv", "^FSUBv")>;
-
-// ASIMD FP arith, pairwise, D-form
-// ASIMD FP arith, pairwise, Q-form
-def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
-
-// ASIMD FP compare, D-form
-// ASIMD FP compare, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
-                                                 "^FCMGTv", "^FCMLEv",
-                                                 "^FCMLTv")>;
-// ASIMD FP round, D-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^FRINT[AIMNPXZ](v2f32)")>;
-// ASIMD FP round, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
-            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
-
-// ASIMD FP convert, long
-// ASIMD FP convert, narrow
-// ASIMD FP convert, other, D-form
-// ASIMD FP convert, other, Q-form
-
-// ASIMD FP convert, long and narrow
-def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
-// ASIMD FP convert, other, D-form
-def : InstRW<[A64FXWrite_FCVTXNV],
-      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
-// ASIMD FP convert, other, Q-form
-def : InstRW<[A64FXWrite_FCVTXNV],
-      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP divide, D-form, F32
-def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
-
-// ASIMD FP divide, Q-form, F32
-def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
-def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
-
-// ASIMD FP divide, Q-form, F64
-def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
-
-// ASIMD FP max/min, normal, D-form
-// ASIMD FP max/min, normal, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
-                                               "^FMINv", "^FMINNMv")>;
-
-// ASIMD FP max/min, pairwise, D-form
-// ASIMD FP max/min, pairwise, Q-form
-def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
-                                           "^FMINPv", "^FMINNMPv")>;
-
-// ASIMD FP max/min, reduce
-def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
-                                              "^FMINVv", "^FMINNMVv")>;
-
-// ASIMD FP multiply, D-form, FZ
-// ASIMD FP multiply, D-form, no FZ
-// ASIMD FP multiply, Q-form, FZ
-// ASIMD FP multiply, Q-form, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
-def : InstRW<[A64FXWrite_FMULXE],
-            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
-def : InstRW<[A64FXWrite_FMULXE],
-            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP multiply accumulate, Dform, FZ
-// ASIMD FP multiply accumulate, Dform, no FZ
-// ASIMD FP multiply accumulate, Qform, FZ
-// ASIMD FP multiply accumulate, Qform, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
-def : InstRW<[A64FXWrite_FMULXE],
-            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A64FXWrite_FMULXE],
-            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP negate
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
-
-//--
-// 3.14 ASIMD Miscellaneous Instructions
-//--
-
-// ASIMD bit reverse
-def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
-
-// ASIMD bitwise insert, D-form
-// ASIMD bitwise insert, Q-form
-def : InstRW<[A64FXWrite_BIF],
-            (instregex "^BIFv", "^BITv", "^BSLv")>;
-
-// ASIMD count, D-form
-// ASIMD count, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI0],
-            (instregex "^CLSv", "^CLZv", "^CNTv")>;
-
-// ASIMD duplicate, gen reg
-// ASIMD duplicate, element
-def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>;
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
-
-// ASIMD extract
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
-
-// ASIMD extract narrow
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
-
-// ASIMD extract narrow, saturating
-def : InstRW<[A64FXWrite_6Cyc_GI3],
-            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
-
-// ASIMD insert, element to element
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
-
-// ASIMD transfer, element to gen reg
-def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
-
-// ASIMD move, integer immed
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
-
-// ASIMD move, FP immed
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
-
-// ASIMD table lookup, D-form
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
-def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
-
-// ASIMD table lookup, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
-def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
-
-// ASIMD transpose
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>;
-
-// ASIMD unzip/zip
-def : InstRW<[A64FXWrite_6Cyc_GI0],
-            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
-
-// ASIMD reciprocal estimate, D-form
-// ASIMD reciprocal estimate, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
-                       "^FRSQRTEv", "^URSQRTEv")>;
-
-// ASIMD reciprocal step, D-form, FZ
-// ASIMD reciprocal step, D-form, no FZ
-// ASIMD reciprocal step, Q-form, FZ
-// ASIMD reciprocal step, Q-form, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
-
-// ASIMD reverse
-def : InstRW<[A64FXWrite_4Cyc_GI03],
-            (instregex "^REV16v", "^REV32v", "^REV64v")>;
-
-// ASIMD table lookup, D-form
-// ASIMD table lookup, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
-
-// ASIMD transfer, element to word or word
-def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
-
-// ASIMD transfer, element to gen reg
-def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
-
-// ASIMD transfer gen reg to element
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
-
-// ASIMD transpose
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
-                                                 "^UZP1v", "^UZP2v")>;
-
-// ASIMD unzip/zip
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
-
-//--
-// 3.15 ASIMD Load Instructions
-//--
-
-// ASIMD load, 1 element, multiple, 1 reg, D-form
-// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[A64FXWrite_8Cyc_GI56],
-            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_11Cyc_GI56],
-            (instregex "^LD1Onev(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD108, WriteAdr],
-            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD109, WriteAdr],
-            (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 2 reg, D-form
-// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[A64FXWrite_LD102],
-            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD103],
-            (instregex "^LD1Twov(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD110, WriteAdr],
-            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD111, WriteAdr],
-            (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 3 reg, D-form
-// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[A64FXWrite_LD104],
-            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD105],
-            (instregex "^LD1Threev(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD112, WriteAdr],
-            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD113, WriteAdr],
-            (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 4 reg, D-form
-// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[A64FXWrite_LD106],
-            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD107],
-            (instregex "^LD1Fourv(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD114, WriteAdr],
-            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD115, WriteAdr],
-            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, one lane, B/H/S
-// ASIMD load, 1 element, one lane, D
-def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
-            (instregex "^LD1i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 1 element, all lanes, D-form, B/H/S
-// ASIMD load, 1 element, all lanes, D-form, D
-// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[A64FXWrite_8Cyc_GI03],
-            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD108, WriteAdr],
-            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 2 element, multiple, D-form, B/H/S
-// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD103],
-            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD111, WriteAdr],
-            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 2 element, one lane, B/H
-// ASIMD load, 2 element, one lane, S
-// ASIMD load, 2 element, one lane, D
-def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
-            (instregex "^LD2i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 2 element, all lanes, D-form, B/H/S
-// ASIMD load, 2 element, all lanes, D-form, D
-// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[A64FXWrite_LD102],
-            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD110, WriteAdr],
-            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 3 element, multiple, D-form, B/H/S
-// ASIMD load, 3 element, multiple, Q-form, B/H/S
-// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD105],
-            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD113, WriteAdr],
-            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 3 element, one lone, B/H
-// ASIMD load, 3 element, one lane, S
-// ASIMD load, 3 element, one lane, D
-def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
-            (instregex "^LD3i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 3 element, all lanes, D-form, B/H/S
-// ASIMD load, 3 element, all lanes, D-form, D
-// ASIMD load, 3 element, all lanes, Q-form, B/H/S
-// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[A64FXWrite_LD104],
-            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD112, WriteAdr],
-            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 4 element, multiple, D-form, B/H/S
-// ASIMD load, 4 element, multiple, Q-form, B/H/S
-// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD107],
-            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD115, WriteAdr],
-            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 4 element, one lane, B/H
-// ASIMD load, 4 element, one lane, S
-// ASIMD load, 4 element, one lane, D
-def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
-            (instregex "^LD4i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 4 element, all lanes, D-form, B/H/S
-// ASIMD load, 4 element, all lanes, D-form, D
-// ASIMD load, 4 element, all lanes, Q-form, B/H/S
-// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[A64FXWrite_LD106],
-            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD114, WriteAdr],
-            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-//--
-// 3.16 ASIMD Store Instructions
-//--
-
-// ASIMD store, 1 element, multiple, 1 reg, D-form
-// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[A64FXWrite_ST10],
-            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST14, WriteAdr],
-            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 2 reg, D-form
-// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[A64FXWrite_ST11],
-            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
-            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 3 reg, D-form
-// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[A64FXWrite_ST12],
-            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
-            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 4 reg, D-form
-// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[A64FXWrite_ST13],
-            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
-            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, one lane, B/H/S
-// ASIMD store, 1 element, one lane, D
-def : InstRW<[A64FXWrite_ST10],
-            (instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST14, WriteAdr],
-            (instregex "^ST1i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 2 element, multiple, D-form, B/H/S
-// ASIMD store, 2 element, multiple, Q-form, B/H/S
-// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST11],
-            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
-            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 2 element, one lane, B/H/S
-// ASIMD store, 2 element, one lane, D
-def : InstRW<[A64FXWrite_ST11],
-            (instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
-            (instregex "^ST2i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 3 element, multiple, D-form, B/H/S
-// ASIMD store, 3 element, multiple, Q-form, B/H/S
-// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST12],
-            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
-            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 3 element, one lane, B/H
-// ASIMD store, 3 element, one lane, S
-// ASIMD store, 3 element, one lane, D
-def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
-            (instregex "^ST3i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 4 element, multiple, D-form, B/H/S
-// ASIMD store, 4 element, multiple, Q-form, B/H/S
-// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST13],
-            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
-            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 4 element, one lane, B/H
-// ASIMD store, 4 element, one lane, S
-// ASIMD store, 4 element, one lane, D
-def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
-            (instregex "^ST4i(8|16|32|64)_POST$")>;
-
-// V8.1a Atomics (LSE)
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
-            (instrs CASB, CASH, CASW, CASX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
-            (instrs CASAB, CASAH, CASAW, CASAX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
-            (instrs CASLB, CASLH, CASLW, CASLX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
-            (instrs CASALB, CASALH, CASALW, CASALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
-             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
-             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
-             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
-             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
-             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
-             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
-             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
-             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
-             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
-            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
-             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
-             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
-             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
-            (instrs SWPB, SWPH, SWPW, SWPX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
-            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
-            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
-            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
-
-def : InstRW<[A64FXWrite_STUR, WriteAtomic],
-            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
-
-// [ 1]   "abs  $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>;
-
-// [ 2]   "add  $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>;
-
-// [ 3]   "add  $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>;
-
-// [ 4]   "add  $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>;
-
-// [ 5]   "addpl        $Rd, $Rn, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>;
-
-// [ 6]   "addvl        $Rd, $Rn, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>;
-
-// [ 7]   "adr  $Zd, [$Zn, $Zm]";
-def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>;
-
-// [ 8]   "and  $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>;
-
-// [ 9]   "and  $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>;
-
-// [10]   "and  $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>;
-
-// [11]   "and  $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>;
-
-// [12]   "ands $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>;
-
-// [13]   "andv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>;
-
-// [14]   "asr  $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>;
-
-// [15]   "asr  $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>;
-
-// [16]   "asr  $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>;
-
-// [17]   "asr  $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>;
-
-// [18]   "asrd $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>;
-
-// [19]   "asrr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>;
-
-// [20]   "bic  $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>;
-
-// [21]   "bic  $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>;
-
-// [22]   "bic  $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>;
-
-// [23]   "bics $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>;
-
-// [24]   "brka $Pd, $Pg/m, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>;
-
-// [25]   "brka $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>;
-
-// [26]   "brkas        $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>;
-
-// [27]   "brkb $Pd, $Pg/m, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>;
-
-// [28]   "brkb $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>;
-
-// [29]   "brkbs        $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>;
-
-// [30]   "brkn $Pdm, $Pg/z, $Pn, $_Pdm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>;
-
-// [31]   "brkns        $Pdm, $Pg/z, $Pn, $_Pdm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>;
-
-// [32]   "brkpa        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>;
-
-// [33]   "brkpas       $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>;
-
-// [34]   "brkpb        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>;
-
-// [35]   "brkpbs       $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>;
-
-// [36]   "clasta       $Rdn, $Pg, $_Rdn, $Zm";
-def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>;
-
-// [37]   "clasta       $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>;
-
-// [38]   "clasta       $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>;
-
-// [39]   "clastb       $Rdn, $Pg, $_Rdn, $Zm";
-def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>;
-
-// [40]   "clastb       $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>;
-
-// [41]   "clastb       $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>;
-
-// [42]   "cls  $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>;
-
-// [43]   "clz  $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>;
-
-// [44]   "cmpeq        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>;
-
-// [45]   "cmpeq        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>;
-
-// [46]   "cmpge        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>;
-
-// [47]   "cmpge        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>;
-
-// [48]   "cmpgt        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>;
-
-// [49]   "cmpgt        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>;
-
-// [50]   "cmphi        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>;
-
-// [51]   "cmphi        $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>;
-
-// [52]   "cmphs        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>;
-
-// [53]   "cmphs        $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>;
-
-// [54]   "cmple        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>;
-
-// [55]   "cmple        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>;
-
-// [56]   "cmplo        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>;
-
-// [57]   "cmplo        $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>;
-
-// [58]   "cmpls        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>;
-
-// [59]   "cmpls        $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>;
-
-// [60]   "cmplt        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>;
-
-// [61]   "cmplt        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>;
-
-// [62]   "cmpne        $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>;
-
-// [63]   "cmpne        $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>;
-
-// [64]   "cnot $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>;
-
-// [65]   "cnt  $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>;
-
-// [66]   "cntb $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>;
-
-// [67]   "cntd $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>;
-
-// [68]   "cnth $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>;
-
-// [69]   "cntp $Rd, $Pg, $Pn";
-def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>;
-
-// [70]   "cntw $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>;
-
-// [71]   "compact      $Zd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>;
-
-// [72]   "cpy  $Zd, $Pg/m, $Rn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
-
-// [73]   "cpy  $Zd, $Pg/m, $Vn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
-
-// [74]   "cpy  $Zd, $Pg/m, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
-
-// [75]   "cpy  $Zd, $Pg/z, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
-
-// [76]   "ctermeq      $Rn, $Rm";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>;
-
-// [77]   "ctermne      $Rn, $Rm";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>;
-
-// [78]   "decb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>;
-
-// [79]   "decd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>;
-
-// [80]   "decd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>;
-
-// [81]   "dech $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>;
-
-// [82]   "dech $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>;
-
-// [83]   "decp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>;
-
-// [84]   "decp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>;
-
-// [85]   "decw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>;
-
-// [86]   "decw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>;
-
-// [87]   "dup  $Zd, $Rn";
-def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>;
-
-// [88]   "dup  $Zd, $Zn$idx";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>;
-
-// [89]   "dup  $Zd, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>;
-
-// [90]   "dupm $Zd, $imms";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>;
-
-// [91]   "eor  $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>;
-
-// [92]   "eor  $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>;
-
-// [93]   "eor  $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>;
-
-// [94]   "eor  $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>;
-
-// [95]   "eors $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>;
-
-// [96]   "eorv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>;
-
-// [97]   "ext  $Zdn, $_Zdn, $Zm, $imm8";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>;
-
-// [99]   "fabd $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>;
-
-// [100]   "fabs        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>;
-
-// [101]   "facge       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>;
-
-// [102]   "facgt       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>;
-
-// [103]   "fadd        $Zd, $Zn, $Zm"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>;
-
-// [104]   "fadd        $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>;
-
-// [105]   "fadd        $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>;
-
-// [106]   "fadda       $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>;
-
-// [107]   "faddv       $Vd, $Pg, $Zn";
-// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle
-// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle
-// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle
-def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>;
-def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>;
-def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>;
-
-// [108]   "fcadd       $Zdn, $Pg/m, $_Zdn, $Zm, $imm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>;
-
-// [109]   "fcmeq       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>;
-
-// [110]   "fcmeq       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>;
-
-// [111]   "fcmge       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>;
-
-// [112]   "fcmge       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>;
-
-// [113]   "fcmgt       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>;
-
-// [114]   "fcmgt       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>;
-
-// [115]   "fcmla       $Zda, $Pg/m, $Zn, $Zm, $imm";
-def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>;
-
-// [116]   "fcmla       $Zda, $Zn, $Zm$iop, $imm";
-def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>;
-
-// [117]   "fcmle       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>;
-
-// [118]   "fcmlt       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>;
-
-// [119]   "fcmne       $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>;
-
-// [120]   "fcmne       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>;
-
-// [121]   "fcmuo       $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>;
-
-// [122]   "fcpy        $Zd, $Pg/m, $imm8";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>;
-
-// [123]   "fcvt        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>;
-
-// [124]   "fcvtzs      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>;
-
-// [125]   "fcvtzu      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>;
-
-// [126]   "fdiv        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>;
-
-// [127]   "fdivr       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>;
-
-// [128]   "fdup        $Zd, $imm8";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>;
-
-// [129]   "fexpa       $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>;
-
-// [130]   "fmad        $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>;
-
-// [131]   "fmax        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>;
-
-// [132]   "fmax        $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>;
-
-// [133]   "fmaxnm      $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>;
-
-// [134]   "fmaxnm      $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>;
-
-// [135]   "fmaxnmv     $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>;
-
-// [136]   "fmaxv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>;
-
-// [137]   "fmin        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>;
-
-// [138]   "fmin        $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>;
-
-// [139]   "fminnm      $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>;
-
-// [140]   "fminnm      $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>;
-
-// [141]   "fminnmv     $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>;
-
-// [142]   "fminv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>;
-
-// [143]   "fmla        $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>;
-
-// [144]   "fmla        $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>;
-
-// [145]   "fmls        $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>;
-
-// [146]   "fmls        $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>;
-
-// [147]   "fmsb        $Zdn, $Pg/m, $Zm, $Za";
-
-// [148]   "fmul        $Zd, $Zn, $Zm";
-
-// [149]   "fmul        $Zd, $Zn, $Zm$iop";
-
-// [150]   "fmul        $Zdn, $Pg/m, $_Zdn, $Zm";
-
-// [151]   "fmul        $Zdn, $Pg/m, $_Zdn, $i1";
-
-// [152]   "fmulx       $Zdn, $Pg/m, $_Zdn, $Zm";
-
-// [153]   "fneg        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>;
-
-// [154]   "fnmad       $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>;
-
-// [155]   "fnmla       $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>;
-
-// [156]   "fnmls       $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>;
-
-// [157]   "fnmsb       $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>;
-
-// [158]   "frecpe      $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>;
-
-// [159]   "frecps      $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>;
-
-// [160]   "frecpx      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>;
-
-// [161]   "frinta      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>;
-
-// [162]   "frinti      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>;
-
-// [163]   "frintm      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>;
-
-// [164]   "frintn      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>;
-
-// [165]   "frintp      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>;
-
-// [166]   "frintx      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>;
-
-// [167]   "frintz      $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>;
-
-// [168]   "frsqrte     $Zd, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>;
-
-// [169]   "frsqrts     $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>;
-
-// [170]   "fscale      $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>;
-
-// [171]   "fsqrt       $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>;
-
-// [172]   "fsub        $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>;
-
-// [173]   "fsub        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>;
-
-// [174]   "fsub        $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>;
-
-// [175]   "fsubr       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>;
-
-// [176]   "fsubr       $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>;
-
-// [177]   "ftmad       $Zdn, $_Zdn, $Zm, $imm3";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>;
-
-// [178]   "ftsmul      $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>;
-
-// [180]   "incb        $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>;
-
-// [181]   "incd        $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>;
-
-// [182]   "incd        $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>;
-
-// [183]   "inch        $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>;
-
-// [184]   "inch        $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>;
-
-// [185]   "incp        $Rdn, $Pg";
-def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>;
-
-// [186]   "incp        $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>;
-
-// [187]   "incw        $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>;
-
-// [188]   "incw        $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>;
-
-// [189]   "index       $Zd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>;
-
-// [190]   "index       $Zd, $Rn, $imm5";
-def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>;
-
-// [191]   "index       $Zd, $imm5, $Rm";
-def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>;
-
-// [192]   "index       $Zd, $imm5, $imm5b";
-def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>;
-
-// [193]   "insr        $Zdn, $Rm";
-def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>;
-
-// [194]   "insr        $Zdn, $Vm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>;
-
-// [195]   "lasta       $Rd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>;
-
-// [196]   "lasta       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>;
-
-// [197]   "lastb       $Rd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>;
-
-// [198]   "lastb       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>;
-
-// [199]   "ld1b        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>;
-
-// [200]   "ld1b        $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>;
-
-// [201]   "ld1b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>;
-
-// [202]   "ld1b        $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>;
-
-// [203]   "ld1d        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>;
-
-// [204]   "ld1d        $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>;
-
-// [205]   "ld1d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>;
-
-// [206]   "ld1d        $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>;
-
-// [207]   "ld1h        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>;
-
-// [208]   "ld1h        $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>;
-
-// [209]   "ld1h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>;
-
-// [210]   "ld1h        $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>;
-
-// [211]   "ld1rb       $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>;
-
-// [212]   "ld1rd       $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>;
-
-// [213]   "ld1rh       $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>;
-
-// [214]   "ld1rqb      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>;
-
-// [215]   "ld1rqb      $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>;
-
-// [216]   "ld1rqd      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>;
-
-// [217]   "ld1rqd      $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>;
-
-// [218]   "ld1rqh      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>;
-
-// [219]   "ld1rqh      $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>;
-
-// [220]   "ld1rqw      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>;
-
-// [221]   "ld1rqw      $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>;
-
-// [222]   "ld1rsb      $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>;
-
-// [223]   "ld1rsh      $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>;
-
-// [224]   "ld1rsw      $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>;
-
-// [225]   "ld1rw       $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>;
-
-// [226]   "ld1sb       $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>;
-
-// [227]   "ld1sb       $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>;
-
-// [228]   "ld1sb       $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>;
-
-// [229]   "ld1sb       $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>;
-
-// [230]   "ld1sh       $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>;
-
-// [231]   "ld1sh       $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>;
-
-// [232]   "ld1sh       $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>;
-
-// [233]   "ld1sh       $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>;
-
-// [234]   "ld1sw       $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>;
-
-// [235]   "ld1sw       $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>;
-
-// [236]   "ld1sw       $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>;
-
-// [237]   "ld1sw       $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>;
-
-// [238]   "ld1w        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>;
-
-// [239]   "ld1w        $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>;
-
-// [240]   "ld1w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>;
-
-// [241]   "ld1w        $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>;
-
-// [242]   "ld2b        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>;
-
-// [243]   "ld2b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>;
-
-// [244]   "ld2d        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>;
-
-// [245]   "ld2d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>;
-
-// [246]   "ld2h        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>;
-
-// [247]   "ld2h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>;
-
-// [248]   "ld2w        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>;
-
-// [249]   "ld2w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>;
-
-// [250]   "ld3b        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>;
-
-// [251]   "ld3b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>;
-
-// [252]   "ld3d        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>;
-
-// [253]   "ld3d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>;
-
-// [254]   "ld3h        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>;
-
-// [255]   "ld3h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>;
-
-// [256]   "ld3w        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>;
-
-// [257]   "ld3w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>;
-
-// [258]   "ld4b        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>;
-
-// [259]   "ld4b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>;
-
-// [260]   "ld4d        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>;
-
-// [261]   "ld4d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>;
-
-// [262]   "ld4h        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>;
-
-// [263]   "ld4h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>;
-
-// [264]   "ld4w        $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>;
-
-// [265]   "ld4w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>;
-
-// [266]   "ldff1b      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>;
-
-// [267]   "ldff1b      $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>;
-
-// [268]   "ldff1b      $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>;
-
-// [269]   "ldff1d      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>;
-
-// [270]   "ldff1d      $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>;
-
-// [271]   "ldff1d      $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>;
-
-// [272]   "ldff1h      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>;
-
-// [273]   "ldff1h      $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>;
-
-// [274]   "ldff1h      $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>;
-
-// [275]   "ldff1sb     $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>;
-
-// [276]   "ldff1sb     $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>;
-
-// [277]   "ldff1sb     $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>;
-
-// [278]   "ldff1sh     $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>;
-
-// [279]   "ldff1sh     $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>;
-
-// [280]   "ldff1sh     $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>;
-
-// [281]   "ldff1sw     $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>;
-
-// [282]   "ldff1sw     $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>;
-
-// [283]   "ldff1sw     $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>;
-
-// [284]   "ldff1w      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>;
-
-// [285]   "ldff1w      $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>;
-
-// [286]   "ldff1w      $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>;
-
-// [287]   "ldnf1b      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>;
-
-// [288]   "ldnf1d      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>;
-
-// [289]   "ldnf1h      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>;
-
-// [290]   "ldnf1sb     $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>;
-
-// [291]   "ldnf1sh     $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>;
-
-// [292]   "ldnf1sw     $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>;
-
-// [293]   "ldnf1w      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>;
-
-// [294]   "ldnt1b      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>;
-
-// [295]   "ldnt1b      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>;
-
-// [296]   "ldnt1d      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>;
-
-// [297]   "ldnt1d      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>;
-
-// [298]   "ldnt1h      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>;
-
-// [299]   "ldnt1h      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>;
-
-// [300]   "ldnt1w      $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>;
-
-// [301]   "ldnt1w      $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>;
-
-// [302]   "ldr $Pt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>;
-
-// [303]   "ldr $Zt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>;
-
-// [304]   "lsl $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>;
-
-// [305]   "lsl $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>;
-
-// [306]   "lsl $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>;
-
-// [307]   "lsl $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>;
-
-// [308]   "lslr        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>;
-
-// [309]   "lsr $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>;
-
-// [310]   "lsr $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>;
-
-// [311]   "lsr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>;
-
-// [312]   "lsr $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>;
-
-// [313]   "lsrr        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>;
-
-// [314]   "mad $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>;
-
-// [315]   "mla $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>;
-
-// [316]   "mls $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>;
-
-// [317]   "movprfx     $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>;
-
-// [318]   "movprfx     $Zd, $Pg/z, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>;
-
-// [319]   "movprfx     $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>;
-
-// [320]   "msb $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>;
-
-// [321]   "mul $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>;
-
-// [322]   "mul $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>;
-
-// [323]   "nand        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>;
-
-// [324]   "nands       $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>;
-
-// [325]   "neg $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>;
-
-// [326]   "nor $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>;
-
-// [327]   "nors        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>;
-
-// [328]   "not $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>;
-
-// [329]   "orn $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>;
-
-// [330]   "orns        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>;
-
-// [331]   "orr $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>;
-
-// [332]   "orr $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>;
-
-// [333]   "orr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>;
-
-// [334]   "orr $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>;
-
-// [335]   "orrs        $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>;
-
-// [336]   "orv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>;
-
-// [337]   "pfalse      $Pd";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>;
-
-// [338]   "pnext       $Pdn, $Pg, $_Pdn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>;
-
-// [339]   "prfb        $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>;
-
-// [340]   "prfb        $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>;
-
-// [341]   "prfb        $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>;
-
-// [342]   "prfb        $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>;
-
-// [343]   "prfd        $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>;
-
-// [344]   "prfd        $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>;
-
-// [345]   "prfd        $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>;
-
-// [346]   "prfd        $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>;
-
-// [347]   "prfh        $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>;
-
-// [348]   "prfh        $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>;
-
-// [349]   "prfh        $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>;
-
-// [350]   "prfh        $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>;
-
-// [351]   "prfw        $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>;
-
-// [352]   "prfw        $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>;
-
-// [353]   "prfw        $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>;
-
-// [354]   "prfw        $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>;
-
-// [355]   "ptest       $Pg, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>;
-
-// [356]   "ptrue       $Pd, $pattern";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>;
-
-// [357]   "ptrues      $Pd, $pattern";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>;
-
-// [358]   "punpkhi     $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>;
-
-// [359]   "punpklo     $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>;
-
-// [360]   "rbit        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>;
-
-// [361]   "rdffr       $Pd";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>;
-
-// [362]   "rdffr       $Pd, $Pg/z";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>;
-
-// [363]   "rdffrs      $Pd, $Pg/z";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>;
-
-// [364]   "rdvl        $Rd, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>;
-
-// [365]   "rev $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>;
-
-// [366]   "rev $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>;
-
-// [367]   "revb        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>;
-
-// [368]   "revh        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>;
-
-// [369]   "revw        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>;
-
-// [370]   "sabd        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>;
-
-// [371]   "saddv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>;
-
-// [372]   "scvtf       $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>;
-
-// [373]   "sdiv        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>;
-
-// [374]   "sdivr       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>;
-
-// [375]   "sdot        $Zda, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>;
-
-// [376]   "sdot        $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>;
-
-// [377]   "sel $Pd, $Pg, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>;
-
-// [378]   "sel $Zd, $Pg, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>;
-
-// [379]   "setffr";
-def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>;
-
-// [380]   "smax        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>;
-
-// [381]   "smax        $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>;
-
-// [382]   "smaxv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>;
-
-// [383]   "smin        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>;
-
-// [384]   "smin        $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>;
-
-// [385]   "sminv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>;
-
-// [386]   "smulh       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>;
-
-// [387]   "splice      $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>;
-
-// [388]   "sqadd       $Zd, $Zn, $Zm";
-
-// [389]   "sqadd       $Zdn, $_Zdn, $imm";
-
-// [390]   "sqdecb      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>;
-
-// [391]   "sqdecb      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>;
-
-// [392]   "sqdecd      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>;
-
-// [393]   "sqdecd      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>;
-
-// [394]   "sqdecd      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>;
-
-// [395]   "sqdech      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>;
-
-// [396]   "sqdech      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>;
-
-// [397]   "sqdech      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>;
-
-// [398]   "sqdecp      $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>;
-
-// [399]   "sqdecp      $Rdn, $Pg, $_Rdn";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>;
-
-// [400]   "sqdecp      $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>;
-
-// [401]   "sqdecw      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>;
-
-// [402]   "sqdecw      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>;
-
-// [403]   "sqdecw      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>;
-
-// [404]   "sqincb      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>;
-
-// [405]   "sqincb      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>;
-
-// [406]   "sqincd      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>;
-
-// [407]   "sqincd      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>;
-
-// [408]   "sqincd      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>;
-
-// [409]   "sqinch      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>;
-
-// [410]   "sqinch      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>;
-
-// [411]   "sqinch      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>;
-
-// [412]   "sqincp      $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>;
-
-// [413]   "sqincp      $Rdn, $Pg, $_Rdn";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>;
-
-// [414]   "sqincp      $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>;
-
-// [415]   "sqincw      $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>;
-
-// [416]   "sqincw      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>;
-
-// [417]   "sqincw      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>;
-
-// [418]   "sqsub       $Zd, $Zn, $Zm";
-
-// [419]   "sqsub       $Zdn, $_Zdn, $imm";
-
-// [420]   "st1b        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>;
-
-// [421]   "st1b        $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
-
-// [422]   "st1b        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>;
-
-// [423]   "st1b        $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>;
-
-// [424]   "st1d        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>;
-
-// [425]   "st1d        $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
-
-// [426]   "st1d        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>;
-
-// [427]   "st1d        $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>;
-
-// [428]   "st1h        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>;
-
-// [429]   "st1h        $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
-
-// [430]   "st1h        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>;
-
-// [431]   "st1h        $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>;
-
-// [432]   "st1w        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>;
-
-// [433]   "st1w        $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
-
-// [434]   "st1w        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>;
-
-// [435]   "st1w        $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>;
-
-// [436]   "st2b        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>;
-
-// [437]   "st2b        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>;
-
-// [438]   "st2d        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>;
-
-// [439]   "st2d        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>;
-
-// [440]   "st2h        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>;
-
-// [441]   "st2h        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>;
-
-// [442]   "st2w        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>;
-
-// [443]   "st2w        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>;
-
-// [444]   "st3b        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>;
-
-// [445]   "st3b        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>;
-
-// [446]   "st3d        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>;
-
-// [447]   "st3d        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>;
-
-// [448]   "st3h        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>;
-
-// [449]   "st3h        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>;
-
-// [450]   "st3w        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>;
-
-// [451]   "st3w        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>;
-
-// [452]   "st4b        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>;
-
-// [453]   "st4b        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>;
-
-// [454]   "st4d        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>;
-
-// [455]   "st4d        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>;
-
-// [456]   "st4h        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>;
-
-// [457]   "st4h        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>;
-
-// [458]   "st4w        $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>;
-
-// [459]   "st4w        $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>;
-
-// [460]   "stnt1b      $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>;
-
-// [461]   "stnt1b      $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>;
-
-// [462]   "stnt1d      $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>;
-
-// [463]   "stnt1d      $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>;
-
-// [464]   "stnt1h      $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>;
-
-// [465]   "stnt1h      $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>;
-
-// [466]   "stnt1w      $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>;
-
-// [467]   "stnt1w      $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>;
-
-// [468]   "str $Pt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>;
-
-// [469]   "str $Zt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>;
-
-// [470]   "sub $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>;
-
-// [471]   "sub $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>;
-
-// [472]   "sub $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>;
-
-// [473]   "subr        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>;
-
-// [474]   "subr        $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>;
-
-// [475]   "sunpkhi     $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>;
-
-// [476]   "sunpklo     $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>;
-
-// [477]   "sxtb        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>;
-
-// [478]   "sxth        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>;
-
-// [479]   "sxtw        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>;
-
-// [480]   "tbl $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>;
-
-// [481]   "trn1        $Pd, $Pn, $Pm";
-
-// [482]   "trn1        $Zd, $Zn, $Zm";
-
-// [483]   "trn2        $Pd, $Pn, $Pm";
-
-// [484]   "trn2        $Zd, $Zn, $Zm";
-
-// [486]   "uabd        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>;
-
-// [487]   "uaddv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>;
-
-// [488]   "ucvtf       $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>;
-
-// [489]   "udiv        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>;
-
-// [490]   "udivr       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>;
-
-// [491]   "udot        $Zda, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>;
-
-// [492]   "udot        $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>;
-
-// [493]   "umax        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>;
-
-// [494]   "umax        $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>;
-
-// [495]   "umaxv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>;
-
-// [496]   "umin        $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>;
-
-// [497]   "umin        $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>;
-
-// [498]   "uminv       $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>;
-
-// [499]   "umulh       $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>;
-
-// [500]   "uqadd       $Zd, $Zn, $Zm";
-
-// [501]   "uqadd       $Zdn, $_Zdn, $imm";
-
-// [502]   "uqdecb      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>;
-
-// [503]   "uqdecd      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>;
-
-// [504]   "uqdecd      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>;
-
-// [505]   "uqdech      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>;
-
-// [506]   "uqdech      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>;
-
-// [507]   "uqdecp      $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>;
-
-// [508]   "uqdecp      $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>;
-
-// [509]   "uqdecw      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>;
-
-// [510]   "uqdecw      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>;
-
-// [511]   "uqincb      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>;
-
-// [512]   "uqincd      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>;
-
-// [513]   "uqincd      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>;
-
-// [514]   "uqinch      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>;
-
-// [515]   "uqinch      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>;
-
-// [516]   "uqincp      $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>;
-
-// [517]   "uqincp      $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>;
-
-// [518]   "uqincw      $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>;
-
-// [519]   "uqincw      $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>;
-
-// [520]   "uqsub       $Zd, $Zn, $Zm";
-//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>;
-
-// [521]   "uqsub       $Zdn, $_Zdn, $imm";
-//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>;
-
-// [522]   "uunpkhi     $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>;
-
-// [523]   "uunpklo     $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>;
-
-// [524]   "uxtb        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>;
-
-// [525]   "uxth        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>;
-
-// [526]   "uxtw        $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>;
-
-// [527]   "uzp1        $Pd, $Pn, $Pm";
-
-// [528]   "uzp1        $Zd, $Zn, $Zm";
-
-// [529]   "uzp2        $Pd, $Pn, $Pm";
-
-// [530]   "uzp2        $Zd, $Zn, $Zm";
-
-// [531]   "whilele     $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>;
-
-// [532]   "whilelo     $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>;
-
-// [533]   "whilels     $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>;
-
-// [534]   "whilelt     $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>;
-
-// [535]   "wrffr       $Pn";
-def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>;
-
-// [536]   "zip1        $Pd, $Pn, $Pm";
-
-// [537]   "zip1        $Zd, $Zn, $Zm";
-
-// [538]   "zip2        $Pd, $Pn, $Pm";
-
-// [539]   "zip2        $Zd, $Zn, $Zm";
-
-} // SchedModel = A64FXModel
+//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file defines the scheduling model for the Fujitsu A64FX processors. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+def A64FXModel : SchedMachineModel { 
+  let IssueWidth            =   6; // 6 micro-ops dispatched at a time. 
+  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer. 
+  let LoadLatency           =   5; // Optimistic load latency. 
+  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch. 
+  // Determined via a mix of micro-arch details and experimentation. 
+  let LoopMicroOpBufferSize = 128; 
+  let PostRAScheduler       =   1; // Using PostRA sched. 
+  let CompleteModel         =   1; 
+ 
+  list<Predicate> UnsupportedFeatures = 
+    [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth]; 
+ 
+  let FullInstRWOverlapCheck = 0; 
+} 
+ 
+let SchedModel = A64FXModel in { 
+ 
+// Define the issue ports. 
+ 
+// A64FXIP* 
+ 
+// Port 0 
+def A64FXIPFLA : ProcResource<1>; 
+ 
+// Port 1 
+def A64FXIPPR : ProcResource<1>; 
+ 
+// Port 2 
+def A64FXIPEXA : ProcResource<1>; 
+ 
+// Port 3 
+def A64FXIPFLB : ProcResource<1>; 
+ 
+// Port 4 
+def A64FXIPEXB : ProcResource<1>; 
+ 
+// Port 5 
+def A64FXIPEAGA : ProcResource<1>; 
+ 
+// Port 6 
+def A64FXIPEAGB : ProcResource<1>; 
+ 
+// Port 7 
+def A64FXIPBR : ProcResource<1>; 
+ 
+// Define groups for the functional units on each issue port.  Each group 
+// created will be used by a WriteRes later on. 
+ 
+def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; 
+ 
+def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; 
+ 
+def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; 
+ 
+def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; 
+ 
+def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; 
+ 
+def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; 
+ 
+def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; 
+ 
+def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; 
+ 
+def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; 
+ 
+def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; 
+ 
+def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>; 
+ 
+def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>; 
+ 
+def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>; 
+ 
+def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>; 
+ 
+def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; 
+ 
+def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>; 
+ 
+def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; 
+ 
+def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>; 
+ 
+def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; 
+ 
+def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; 
+ 
+def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, 
+                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> { 
+  let BufferSize = 60; 
+} 
+ 
+def A64FXWrite_6Cyc : SchedWriteRes<[]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 2; 
+} 
+ 
+def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 4; 
+} 
+ 
+def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 9; 
+} 
+ 
+def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 13; 
+} 
+ 
+def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 37; 
+} 
+ 
+def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 98; 
+} 
+ 
+def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 134; 
+} 
+ 
+def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 154; 
+} 
+ 
+def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { 
+  let Latency = 4; 
+} 
+ 
+def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> { 
+  let Latency = 12; 
+} 
+ 
+def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { 
+  let Latency = 17; 
+} 
+ 
+def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> { 
+  let Latency = 21; 
+} 
+ 
+def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { 
+  let Latency = 3; 
+} 
+ 
+def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> { 
+  let Latency = 3; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> { 
+  let Latency = 4; 
+} 
+ 
+def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 
+  let Latency = 3; 
+} 
+ 
+def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 
+  let Latency = 4; 
+} 
+ 
+def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 3; 
+} 
+ 
+def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 4; 
+} 
+ 
+def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 9; 
+} 
+ 
+def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 12; 
+} 
+ 
+def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 15; 
+} 
+ 
+def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 15; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 18; 
+} 
+ 
+def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 45; 
+} 
+ 
+def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 60; 
+} 
+ 
+def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 75; 
+} 
+ 
+def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 
+  let Latency = 12; 
+} 
+ 
+def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 
+  let Latency = 20; 
+} 
+ 
+def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 
+  let Latency = 11; 
+} 
+ 
+def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 
+  let Latency = 2; 
+} 
+ 
+def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { 
+  let Latency = 4; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+} 
+ 
+def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 44; 
+} 
+ 
+def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 15; 
+} 
+ 
+def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 19; 
+} 
+ 
+def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 25; 
+} 
+ 
+def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { 
+  let Latency = 19; 
+} 
+ 
+def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> { 
+  let Latency = 29; 
+} 
+ 
+def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 5; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { 
+  let Latency = 5; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { 
+  let Latency = 5; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 2; 
+ 
+} 
+ 
+def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 8; 
+  let NumMicroOps = 5; 
+} 
+ 
+def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 11; 
+  let NumMicroOps = 5; 
+} 
+ 
+def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 4; 
+} 
+ 
+def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 5; 
+} 
+ 
+def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 6; 
+} 
+ 
+def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 7; 
+} 
+ 
+def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 8; 
+} 
+ 
+def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+  let NumMicroOps = 9; 
+} 
+ 
+def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 25; 
+} 
+ 
+def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 12; 
+} 
+ 
+def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 12; 
+  let NumMicroOps = 6; 
+} 
+ 
+def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+  let NumMicroOps = 6; 
+} 
+ 
+def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 9; 
+} 
+ 
+def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 8; 
+} 
+ 
+ 
+def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 8; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 2; 
+} 
+ 
+ 
+def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 15; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 15; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 15; 
+  let NumMicroOps = 2; 
+} 
+ 
+def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+  let NumMicroOps = 7; 
+} 
+ 
+def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 14; 
+} 
+ 
+def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 5; 
+} 
+ 
+def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+} 
+ 
+def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 9; 
+} 
+ 
+def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 12; 
+} 
+ 
+def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 25; 
+} 
+ 
+def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 3; 
+} 
+ 
+def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 5; 
+} 
+ 
+def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 7; 
+} 
+ 
+def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 10; 
+  let NumMicroOps = 9; 
+} 
+ 
+def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 0; 
+} 
+ 
+def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 6; 
+} 
+ 
+def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 7; 
+} 
+ 
+def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 8; 
+} 
+ 
+def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 9; 
+} 
+ 
+def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 15; 
+} 
+ 
+def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> { 
+  let Latency = 19; 
+} 
+ 
+def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { 
+  let Latency = 7; 
+} 
+ 
+// Define commonly used read types. 
+ 
+// No forwarding is provided for these types. 
+def : ReadAdvance<ReadI,       0>; 
+def : ReadAdvance<ReadISReg,   0>; 
+def : ReadAdvance<ReadIEReg,   0>; 
+def : ReadAdvance<ReadIM,      0>; 
+def : ReadAdvance<ReadIMA,     0>; 
+def : ReadAdvance<ReadID,      0>; 
+def : ReadAdvance<ReadExtrHi,  0>; 
+def : ReadAdvance<ReadAdrBase, 0>; 
+def : ReadAdvance<ReadVLD,     0>; 
+ 
+//===----------------------------------------------------------------------===// 
+// 3. Instruction Tables. 
+ 
+//--- 
+// 3.1 Branch Instructions 
+//--- 
+ 
+// Branch, immed 
+// Branch and link, immed 
+// Compare and branch 
+def : WriteRes<WriteBr,      [A64FXGI7]> { 
+  let Latency = 1; 
+} 
+ 
+// Branch, register 
+// Branch and link, register != LR 
+// Branch and link, register = LR 
+def : WriteRes<WriteBrReg,   [A64FXGI7]> { 
+  let Latency = 1; 
+} 
+ 
+def : WriteRes<WriteSys,     []> { let Latency = 1; } 
+def : WriteRes<WriteBarrier, []> { let Latency = 1; } 
+def : WriteRes<WriteHint,    []> { let Latency = 1; } 
+ 
+def : WriteRes<WriteAtomic,  []> { 
+  let Latency = 4; 
+} 
+ 
+//--- 
+// Branch 
+//--- 
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; 
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; 
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; 
+def : InstRW<[A64FXWrite_1Cyc_GI7], 
+            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; 
+ 
+//--- 
+// 3.2 Arithmetic and Logical Instructions 
+// 3.3 Move and Shift Instructions 
+//--- 
+ 
+// ALU, basic 
+// Conditional compare 
+// Conditional select 
+// Address generation 
+def : WriteRes<WriteI,       [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+def : InstRW<[WriteI], 
+            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?", 
+                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)", 
+                       "ADC(W|X)r", 
+                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)", 
+                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)", 
+                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)", 
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r", 
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)", 
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r", 
+                       "CSINC(W|X)r",           "CSINV(W|X)r", 
+                       "CSNEG(W|X)r")>; 
+ 
+def : InstRW<[WriteI], (instrs COPY)>; 
+ 
+// ALU, extend and/or shift 
+def : WriteRes<WriteISReg,   [A64FXGI2456]> { 
+  let Latency = 2; 
+  let ResourceCycles = [1]; 
+} 
+ 
+def : InstRW<[WriteISReg], 
+            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?", 
+                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)", 
+                       "ADC(W|X)r", 
+                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)", 
+                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)", 
+                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)", 
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r", 
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)", 
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r", 
+                       "CSINC(W|X)r",           "CSINV(W|X)r", 
+                       "CSNEG(W|X)r")>; 
+ 
+def : WriteRes<WriteIEReg,   [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+def : InstRW<[WriteIEReg], 
+            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?", 
+                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)", 
+                       "ADC(W|X)r", 
+                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)", 
+                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)", 
+                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)", 
+                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r", 
+                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)", 
+                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r", 
+                       "CSINC(W|X)r",           "CSINV(W|X)r", 
+                       "CSNEG(W|X)r")>; 
+ 
+// Move immed 
+def : WriteRes<WriteImm,     [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+def : InstRW<[A64FXWrite_1Cyc_GI2456], 
+            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 
+ 
+def : InstRW<[A64FXWrite_2Cyc_GI24], 
+            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; 
+ 
+// Variable shift 
+def : WriteRes<WriteIS,      [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+//--- 
+// 3.4 Divide and Multiply Instructions 
+//--- 
+ 
+// Divide, W-form 
+def : WriteRes<WriteID32,    [A64FXGI4]> { 
+  let Latency = 39; 
+  let ResourceCycles = [39]; 
+} 
+ 
+// Divide, X-form 
+def : WriteRes<WriteID64,    [A64FXGI4]> { 
+  let Latency = 23; 
+  let ResourceCycles = [23]; 
+} 
+ 
+// Multiply accumulate, W-form 
+def : WriteRes<WriteIM32,    [A64FXGI2456]> { 
+  let Latency = 5; 
+  let ResourceCycles = [1]; 
+} 
+ 
+// Multiply accumulate, X-form 
+def : WriteRes<WriteIM64,    [A64FXGI2456]> { 
+  let Latency = 5; 
+  let ResourceCycles = [1]; 
+} 
+ 
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; 
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; 
+def : InstRW<[A64FXWrite_MADDL], 
+            (instregex "(S|U)(MADDL|MSUBL)rrr")>; 
+ 
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; 
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; 
+ 
+// Bitfield extract, two reg 
+def : WriteRes<WriteExtr,    [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+// Multiply high 
+def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; 
+ 
+// Miscellaneous Data-Processing Instructions 
+// Bitfield extract 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; 
+ 
+// Bitifield move - basic 
+def : InstRW<[A64FXWrite_1Cyc_GI24], 
+            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; 
+ 
+// Bitfield move, insert 
+def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; 
+ 
+// Count leading 
+def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", 
+                                               "^CLZ(W|X)r$")>; 
+ 
+// Reverse bits 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; 
+ 
+// Cryptography Extensions 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; 
+def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; 
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; 
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; 
+ 
+// CRC Instructions 
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; 
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; 
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; 
+ 
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; 
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; 
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; 
+ 
+// Reverse bits/bytes 
+// NOTE: Handled by WriteI. 
+ 
+//--- 
+// 3.6 Load Instructions 
+// 3.10 FP Load Instructions 
+//--- 
+ 
+// Load register, literal 
+// Load register, unscaled immed 
+// Load register, immed unprivileged 
+// Load register, unsigned immed 
+def : WriteRes<WriteLD,      [A64FXGI56]> { 
+  let Latency = 4; 
+  let ResourceCycles = [3]; 
+} 
+ 
+// Load register, immed post-index 
+// NOTE: Handled by WriteLD, WriteI. 
+// Load register, immed pre-index 
+// NOTE: Handled by WriteLD, WriteAdr. 
+def : WriteRes<WriteAdr,     [A64FXGI2456]> { 
+  let Latency = 1; 
+  let ResourceCycles = [1]; 
+} 
+ 
+// Load pair, immed offset, normal 
+// Load pair, immed offset, signed words, base != SP 
+// Load pair, immed offset signed words, base = SP 
+// LDP only breaks into *one* LS micro-op.  Thus 
+// the resources are handled by WriteLD. 
+def : WriteRes<WriteLDHi,    []> { 
+  let Latency = 5; 
+} 
+ 
+// Load register offset, basic 
+// Load register, register offset, scale by 4/8 
+// Load register, register offset, scale by 2 
+// Load register offset, extend 
+// Load register, register offset, extend, scale by 4/8 
+// Load register, register offset, extend, scale by 2 
+def A64FXWriteLDIdx : SchedWriteVariant<[ 
+  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>, 
+  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>; 
+def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>; 
+ 
+def A64FXReadAdrBase : SchedReadVariant<[ 
+  SchedVar<ScaledIdxPred, [ReadDefault]>, 
+  SchedVar<NoSchedPred,   [ReadDefault]>]>; 
+def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>; 
+ 
+// Load pair, immed pre-index, normal 
+// Load pair, immed pre-index, signed words 
+// Load pair, immed post-index, normal 
+// Load pair, immed post-index, signed words 
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 
+ 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; 
+ 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; 
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; 
+ 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPDpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPQpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPSpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPWpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPWpre)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPDpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPQpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPSpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPWpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPXpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPDpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPQpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPSpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPWpre)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPXpre)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 
+ 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPDpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPQpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPSpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPWpost)>; 
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 
+            (instrs LDPXpost)>; 
+ 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRBroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRBroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+             (instrs LDRDroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRHroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRHHroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRQroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSHWroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSHXroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRWroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRXroW)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRBroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRDroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRHroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRHHroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRQroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSHWroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRSHXroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRWroX)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 
+            (instrs LDRXroX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; 
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; 
+ 
+//--- 
+// Prefetch 
+//--- 
+def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; 
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; 
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; 
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; 
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; 
+ 
+//-- 
+// 3.7 Store Instructions 
+// 3.11 FP Store Instructions 
+//-- 
+ 
+// Store register, unscaled immed 
+// Store register, immed unprivileged 
+// Store register, unsigned immed 
+def : WriteRes<WriteST,      [A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+// Store register, immed post-index 
+// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 
+ 
+// Store register, immed pre-index 
+// NOTE: Handled by WriteAdr, WriteST 
+ 
+// Store register, register offset, basic 
+// Store register, register offset, scaled by 4/8 
+// Store register, register offset, scaled by 2 
+// Store register, register offset, extend 
+// Store register, register offset, extend, scale by 4/8 
+// Store register, register offset, extend, scale by 1 
+def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> { 
+  let Latency = 1; 
+} 
+ 
+// Store pair, immed offset, W-form 
+// Store pair, immed offset, X-form 
+def : WriteRes<WriteSTP,     [A64FXGI56]> { 
+  let Latency = 1; 
+} 
+ 
+// Store pair, immed post-index, W-form 
+// Store pair, immed post-index, X-form 
+// Store pair, immed pre-index, W-form 
+// Store pair, immed pre-index, X-form 
+// NOTE: Handled by WriteAdr, WriteSTP. 
+ 
+def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; 
+ 
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; 
+ 
+def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; 
+ 
+def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; 
+def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; 
+ 
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 
+ 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPDpre, STPDpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPDpre, STPDpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPDpre, STPDpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPDpre, STPDpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPQpre, STPQpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPQpre, STPQpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPQpre, STPQpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPQpre, STPQpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPSpre, STPSpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPSpre, STPSpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPSpre, STPSpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPSpre, STPSpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPWpre, STPWpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPWpre, STPWpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPWpre, STPWpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPWpre, STPWpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPXpre, STPXpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPXpre, STPXpost)>; 
+def : InstRW<[A64FXWrite_STP01], 
+            (instrs STPXpre, STPXpost)>; 
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STPXpre, STPXpost)>; 
+ 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRBpre, STRBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRBpre, STRBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRBpre, STRBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRBpre, STRBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRBBpre, STRBBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRBBpre, STRBBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRBBpre, STRBBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRBBpre, STRBBpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRDpre, STRDpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRDpre, STRDpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRDpre, STRDpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRDpre, STRDpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRHpre, STRHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRHpre, STRHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRHpre, STRHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRHpre, STRHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRHHpre, STRHHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRHHpre, STRHHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRHHpre, STRHHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRHHpre, STRHHpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRQpre, STRQpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRQpre, STRQpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRQpre, STRQpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRQpre, STRQpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRSpre, STRSpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRSpre, STRSpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRSpre, STRSpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRSpre, STRSpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRWpre, STRWpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRWpre, STRWpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRWpre, STRWpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRWpre, STRWpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRXpre, STRXpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRXpre, STRXpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01], 
+            (instrs STRXpre, STRXpost)>; 
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 
+            (instrs STRXpre, STRXpost)>; 
+ 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRBroW, STRBroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRBroW, STRBroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRBBroW, STRBBroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRBBroW, STRBBroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRDroW, STRDroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRDroW, STRDroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRHroW, STRHroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRHroW, STRHroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRHHroW, STRHHroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRHHroW, STRHHroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRQroW, STRQroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRQroW, STRQroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRSroW, STRSroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRSroW, STRSroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRWroW, STRWroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRWroW, STRWroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRXroW, STRXroX)>; 
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 
+            (instrs STRXroW, STRXroX)>; 
+ 
+//--- 
+// 3.8 FP Data Processing Instructions 
+//--- 
+ 
+// FP absolute value 
+// FP min/max 
+// FP negate 
+def : WriteRes<WriteF,       [A64FXGI03]> { 
+  let Latency = 4; 
+  let ResourceCycles = [2]; 
+} 
+ 
+// FP arithmetic 
+ 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; 
+ 
+// FP compare 
+def : WriteRes<WriteFCmp,    [A64FXGI03]> { 
+  let Latency = 4; 
+  let ResourceCycles = [2]; 
+} 
+ 
+// FP Div, Sqrt 
+def : WriteRes<WriteFDiv, [A64FXGI0]> { 
+  let Latency = 43; 
+} 
+ 
+def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 38; 
+} 
+ 
+def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 29; 
+} 
+ 
+def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 43; 
+} 
+ 
+def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 29; 
+} 
+ 
+def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { 
+  let Latency = 43; 
+} 
+ 
+// FP divide, S-form 
+// FP square root, S-form 
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; 
+def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; 
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; 
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; 
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; 
+ 
+// FP divide, D-form 
+// FP square root, D-form 
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; 
+def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; 
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; 
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; 
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; 
+ 
+// FP multiply 
+// FP multiply accumulate 
+def : WriteRes<WriteFMul, [A64FXGI03]> { 
+  let Latency = 9; 
+  let ResourceCycles = [2]; 
+} 
+ 
+def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 9; 
+  let ResourceCycles = [2]; 
+} 
+ 
+def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> { 
+  let Latency = 9; 
+  let ResourceCycles = [2]; 
+} 
+ 
+def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>; 
+def : InstRW<[A64FXXWriteFMulAcc], 
+            (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; 
+ 
+// FP round to integral 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 
+ 
+// FP select 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; 
+ 
+//--- 
+// 3.9 FP Miscellaneous Instructions 
+//--- 
+ 
+// FP convert, from vec to vec reg 
+// FP convert, from gen to vec reg 
+// FP convert, from vec to gen reg 
+def : WriteRes<WriteFCvt, [A64FXGI03]> { 
+  let Latency = 9; 
+  let ResourceCycles = [2]; 
+} 
+ 
+// FP move, immed 
+// FP move, register 
+def : WriteRes<WriteFImm, [A64FXGI0]> { 
+  let Latency = 4; 
+  let ResourceCycles = [2]; 
+} 
+ 
+// FP transfer, from gen to vec reg 
+// FP transfer, from vec to gen reg 
+def : WriteRes<WriteFCopy, [A64FXGI0]> { 
+  let Latency = 4; 
+  let ResourceCycles = [2]; 
+} 
+ 
+def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; 
+def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; 
+ 
+//--- 
+// 3.12 ASIMD Integer Instructions 
+//--- 
+ 
+// ASIMD absolute diff, D-form 
+// ASIMD absolute diff, Q-form 
+// ASIMD absolute diff accum, D-form 
+// ASIMD absolute diff accum, Q-form 
+// ASIMD absolute diff accum long 
+// ASIMD absolute diff long 
+// ASIMD arith, basic 
+// ASIMD arith, complex 
+// ASIMD compare 
+// ASIMD logical (AND, BIC, EOR) 
+// ASIMD max/min, basic 
+// ASIMD max/min, reduce, 4H/4S 
+// ASIMD max/min, reduce, 8B/8H 
+// ASIMD max/min, reduce, 16B 
+// ASIMD multiply, D-form 
+// ASIMD multiply, Q-form 
+// ASIMD multiply accumulate long 
+// ASIMD multiply accumulate saturating long 
+// ASIMD multiply long 
+// ASIMD pairwise add and accumulate 
+// ASIMD shift accumulate 
+// ASIMD shift by immed, basic 
+// ASIMD shift by immed and insert, basic, D-form 
+// ASIMD shift by immed and insert, basic, Q-form 
+// ASIMD shift by immed, complex 
+// ASIMD shift by register, basic, D-form 
+// ASIMD shift by register, basic, Q-form 
+// ASIMD shift by register, complex, D-form 
+// ASIMD shift by register, complex, Q-form 
+def : WriteRes<WriteV, [A64FXGI03]> { 
+  let Latency = 4; 
+  let ResourceCycles = [1]; 
+} 
+ 
+// ASIMD arith, reduce, 4H/4S 
+// ASIMD arith, reduce, 8B/8H 
+// ASIMD arith, reduce, 16B 
+ 
+// ASIMD logical (MVN (alias for NOT), ORN, ORR) 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 
+ 
+// ASIMD arith, reduce 
+def : InstRW<[A64FXWrite_ADDLV], 
+            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 
+ 
+// ASIMD polynomial (8x8) multiply long 
+def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; 
+def : InstRW<[A64FXWrite_MULLV], 
+            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; 
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; 
+ 
+// ASIMD absolute diff accum, D-form 
+def : InstRW<[A64FXWrite_ABA], 
+            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 
+// ASIMD absolute diff accum, Q-form 
+def : InstRW<[A64FXWrite_ABA], 
+            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 
+// ASIMD absolute diff accum long 
+def : InstRW<[A64FXWrite_ABAL], 
+            (instregex "^[SU]ABAL")>; 
+// ASIMD arith, reduce, 4H/4S 
+def : InstRW<[A64FXWrite_ADDLV1], 
+            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 
+// ASIMD arith, reduce, 8B 
+def : InstRW<[A64FXWrite_ADDLV1], 
+            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 
+// ASIMD arith, reduce, 16B/16H 
+def : InstRW<[A64FXWrite_ADDLV1], 
+            (instregex "^[SU]?ADDL?Vv16i8v$")>; 
+// ASIMD max/min, reduce, 4H/4S 
+def : InstRW<[A64FXWrite_MINMAXV], 
+            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 
+// ASIMD max/min, reduce, 8B/8H 
+def : InstRW<[A64FXWrite_MINMAXV], 
+            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 
+// ASIMD max/min, reduce, 16B/16H 
+def : InstRW<[A64FXWrite_MINMAXV], 
+            (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 
+// ASIMD multiply, D-form 
+def : InstRW<[A64FXWrite_PMUL], 
+            (instregex "^(P?MUL|SQR?DMUL)" # 
+                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # 
+                       "(_indexed)?$")>; 
+ 
+// ASIMD multiply, Q-form 
+def : InstRW<[A64FXWrite_PMUL], 
+            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; 
+ 
+// ASIMD multiply, Q-form 
+def : InstRW<[A64FXWrite_SQRDMULH], 
+            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 
+ 
+// ASIMD multiply accumulate, D-form 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 
+// ASIMD multiply accumulate, Q-form 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 
+// ASIMD shift accumulate 
+def : InstRW<[A64FXWrite_SRSRAV], 
+            (instregex "SRSRAv", "URSRAv")>; 
+def : InstRW<[A64FXWrite_SSRAV], 
+            (instregex "SSRAv", "USRAv")>; 
+ 
+// ASIMD shift by immed, basic 
+def : InstRW<[A64FXWrite_RSHRN], 
+            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; 
+def : InstRW<[A64FXWrite_SHRN], 
+            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; 
+ 
+def : InstRW<[A64FXWrite_6Cyc_GI3], 
+            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; 
+ 
+// ASIMD shift by immed, complex 
+def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; 
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; 
+// ASIMD shift by register, basic, Q-form 
+def : InstRW<[A64FXWrite_6Cyc_GI3], 
+            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 
+// ASIMD shift by register, complex, D-form 
+def : InstRW<[A64FXWrite_6Cyc_GI3], 
+            (instregex "^[SU][QR]{1,2}SHL" # 
+                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 
+// ASIMD shift by register, complex, Q-form 
+def : InstRW<[A64FXWrite_6Cyc_GI3], 
+            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 
+ 
+// ASIMD Arithmetic 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; 
+def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; 
+def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", 
+                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; 
+def : InstRW<[A64FXWrite_ADDP], 
+            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # 
+                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], 
+            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; 
+def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; 
+def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; 
+def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; 
+def : InstRW<[A64FXWrite_MINMAXV], 
+             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; 
+def : InstRW<[A64FXWrite_ABA], 
+             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; 
+def : InstRW<[A64FXWrite_SHRN], 
+            (instregex "^ADDHNv", "^SUBHNv")>; 
+def : InstRW<[A64FXWrite_RSHRN], 
+            (instregex "^RADDHNv", "^RSUBHNv")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", 
+                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", 
+                      "^URHADD", "^USQADD")>; 
+ 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^CMEQv", "^CMGEv", "^CMGTv", 
+                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; 
+def : InstRW<[A64FXWrite_MINMAXV], 
+            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 
+def : InstRW<[A64FXWrite_ADDP], 
+            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^SABDv", "^UABDv")>; 
+def : InstRW<[A64FXWrite_TBX1], 
+            (instregex "^SABDLv", "^UABDLv")>; 
+ 
+//--- 
+// 3.13 ASIMD Floating-point Instructions 
+//--- 
+ 
+// ASIMD FP absolute value 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; 
+ 
+// ASIMD FP arith, normal, D-form 
+// ASIMD FP arith, normal, Q-form 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^FABDv", "^FADDv", "^FSUBv")>; 
+ 
+// ASIMD FP arith, pairwise, D-form 
+// ASIMD FP arith, pairwise, Q-form 
+def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; 
+ 
+// ASIMD FP compare, D-form 
+// ASIMD FP compare, Q-form 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", 
+                                                 "^FCMGTv", "^FCMLEv", 
+                                                 "^FCMLTv")>; 
+// ASIMD FP round, D-form 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^FRINT[AIMNPXZ](v2f32)")>; 
+// ASIMD FP round, Q-form 
+def : InstRW<[A64FXWrite_9Cyc_GI03], 
+            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; 
+ 
+// ASIMD FP convert, long 
+// ASIMD FP convert, narrow 
+// ASIMD FP convert, other, D-form 
+// ASIMD FP convert, other, Q-form 
+ 
+// ASIMD FP convert, long and narrow 
+def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; 
+// ASIMD FP convert, other, D-form 
+def : InstRW<[A64FXWrite_FCVTXNV], 
+      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; 
+// ASIMD FP convert, other, Q-form 
+def : InstRW<[A64FXWrite_FCVTXNV], 
+      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; 
+ 
+// ASIMD FP divide, D-form, F32 
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; 
+def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; 
+ 
+// ASIMD FP divide, Q-form, F32 
+def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; 
+def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; 
+ 
+// ASIMD FP divide, Q-form, F64 
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; 
+def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; 
+ 
+// ASIMD FP max/min, normal, D-form 
+// ASIMD FP max/min, normal, Q-form 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", 
+                                               "^FMINv", "^FMINNMv")>; 
+ 
+// ASIMD FP max/min, pairwise, D-form 
+// ASIMD FP max/min, pairwise, Q-form 
+def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", 
+                                           "^FMINPv", "^FMINNMPv")>; 
+ 
+// ASIMD FP max/min, reduce 
+def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", 
+                                              "^FMINVv", "^FMINNMVv")>; 
+ 
+// ASIMD FP multiply, D-form, FZ 
+// ASIMD FP multiply, D-form, no FZ 
+// ASIMD FP multiply, Q-form, FZ 
+// ASIMD FP multiply, Q-form, no FZ 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; 
+def : InstRW<[A64FXWrite_FMULXE], 
+            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 
+def : InstRW<[A64FXWrite_FMULXE], 
+            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 
+ 
+// ASIMD FP multiply accumulate, Dform, FZ 
+// ASIMD FP multiply accumulate, Dform, no FZ 
+// ASIMD FP multiply accumulate, Qform, FZ 
+// ASIMD FP multiply accumulate, Qform, no FZ 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; 
+def : InstRW<[A64FXWrite_FMULXE], 
+            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; 
+def : InstRW<[A64FXWrite_FMULXE], 
+            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 
+ 
+// ASIMD FP negate 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; 
+ 
+//-- 
+// 3.14 ASIMD Miscellaneous Instructions 
+//-- 
+ 
+// ASIMD bit reverse 
+def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; 
+ 
+// ASIMD bitwise insert, D-form 
+// ASIMD bitwise insert, Q-form 
+def : InstRW<[A64FXWrite_BIF], 
+            (instregex "^BIFv", "^BITv", "^BSLv")>; 
+ 
+// ASIMD count, D-form 
+// ASIMD count, Q-form 
+def : InstRW<[A64FXWrite_4Cyc_GI0], 
+            (instregex "^CLSv", "^CLZv", "^CNTv")>; 
+ 
+// ASIMD duplicate, gen reg 
+// ASIMD duplicate, element 
+def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; 
+ 
+// ASIMD extract 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; 
+ 
+// ASIMD extract narrow 
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; 
+ 
+// ASIMD extract narrow, saturating 
+def : InstRW<[A64FXWrite_6Cyc_GI3], 
+            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; 
+ 
+// ASIMD insert, element to element 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 
+ 
+// ASIMD transfer, element to gen reg 
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 
+ 
+// ASIMD move, integer immed 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; 
+ 
+// ASIMD move, FP immed 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; 
+ 
+// ASIMD table lookup, D-form 
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; 
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; 
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; 
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; 
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; 
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; 
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; 
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; 
+ 
+// ASIMD table lookup, Q-form 
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; 
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; 
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; 
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; 
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; 
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; 
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; 
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; 
+ 
+// ASIMD transpose 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>; 
+ 
+// ASIMD unzip/zip 
+def : InstRW<[A64FXWrite_6Cyc_GI0], 
+            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; 
+ 
+// ASIMD reciprocal estimate, D-form 
+// ASIMD reciprocal estimate, Q-form 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 
+                       "^FRSQRTEv", "^URSQRTEv")>; 
+ 
+// ASIMD reciprocal step, D-form, FZ 
+// ASIMD reciprocal step, D-form, no FZ 
+// ASIMD reciprocal step, Q-form, FZ 
+// ASIMD reciprocal step, Q-form, no FZ 
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; 
+ 
+// ASIMD reverse 
+def : InstRW<[A64FXWrite_4Cyc_GI03], 
+            (instregex "^REV16v", "^REV32v", "^REV64v")>; 
+ 
+// ASIMD table lookup, D-form 
+// ASIMD table lookup, Q-form 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; 
+ 
+// ASIMD transfer, element to word or word 
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 
+ 
+// ASIMD transfer, element to gen reg 
+def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; 
+ 
+// ASIMD transfer gen reg to element 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 
+ 
+// ASIMD transpose 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", 
+                                                 "^UZP1v", "^UZP2v")>; 
+ 
+// ASIMD unzip/zip 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; 
+ 
+//-- 
+// 3.15 ASIMD Load Instructions 
+//-- 
+ 
+// ASIMD load, 1 element, multiple, 1 reg, D-form 
+// ASIMD load, 1 element, multiple, 1 reg, Q-form 
+def : InstRW<[A64FXWrite_8Cyc_GI56], 
+            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], 
+            (instregex "^LD1Onev(16b|8h|4s)$")>; 
+def : InstRW<[A64FXWrite_LD108, WriteAdr], 
+            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; 
+def : InstRW<[A64FXWrite_LD109, WriteAdr], 
+            (instregex "^LD1Onev(16b|8h|4s)_POST$")>; 
+ 
+// ASIMD load, 1 element, multiple, 2 reg, D-form 
+// ASIMD load, 1 element, multiple, 2 reg, Q-form 
+def : InstRW<[A64FXWrite_LD102], 
+            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; 
+def : InstRW<[A64FXWrite_LD103], 
+            (instregex "^LD1Twov(16b|8h|4s)$")>; 
+def : InstRW<[A64FXWrite_LD110, WriteAdr], 
+            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; 
+def : InstRW<[A64FXWrite_LD111, WriteAdr], 
+            (instregex "^LD1Twov(16b|8h|4s)_POST$")>; 
+ 
+// ASIMD load, 1 element, multiple, 3 reg, D-form 
+// ASIMD load, 1 element, multiple, 3 reg, Q-form 
+def : InstRW<[A64FXWrite_LD104], 
+            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; 
+def : InstRW<[A64FXWrite_LD105], 
+            (instregex "^LD1Threev(16b|8h|4s)$")>; 
+def : InstRW<[A64FXWrite_LD112, WriteAdr], 
+            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; 
+def : InstRW<[A64FXWrite_LD113, WriteAdr], 
+            (instregex "^LD1Threev(16b|8h|4s)_POST$")>; 
+ 
+// ASIMD load, 1 element, multiple, 4 reg, D-form 
+// ASIMD load, 1 element, multiple, 4 reg, Q-form 
+def : InstRW<[A64FXWrite_LD106], 
+            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; 
+def : InstRW<[A64FXWrite_LD107], 
+            (instregex "^LD1Fourv(16b|8h|4s)$")>; 
+def : InstRW<[A64FXWrite_LD114, WriteAdr], 
+            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; 
+def : InstRW<[A64FXWrite_LD115, WriteAdr], 
+            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; 
+ 
+// ASIMD load, 1 element, one lane, B/H/S 
+// ASIMD load, 1 element, one lane, D 
+def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_LD1I1, WriteAdr], 
+            (instregex "^LD1i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD load, 1 element, all lanes, D-form, B/H/S 
+// ASIMD load, 1 element, all lanes, D-form, D 
+// ASIMD load, 1 element, all lanes, Q-form 
+def : InstRW<[A64FXWrite_8Cyc_GI03], 
+            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD108, WriteAdr], 
+            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 2 element, multiple, D-form, B/H/S 
+// ASIMD load, 2 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_LD103], 
+            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD111, WriteAdr], 
+            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 2 element, one lane, B/H 
+// ASIMD load, 2 element, one lane, S 
+// ASIMD load, 2 element, one lane, D 
+def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_LD2I1, WriteAdr], 
+            (instregex "^LD2i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD load, 2 element, all lanes, D-form, B/H/S 
+// ASIMD load, 2 element, all lanes, D-form, D 
+// ASIMD load, 2 element, all lanes, Q-form 
+def : InstRW<[A64FXWrite_LD102], 
+            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD110, WriteAdr], 
+            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 3 element, multiple, D-form, B/H/S 
+// ASIMD load, 3 element, multiple, Q-form, B/H/S 
+// ASIMD load, 3 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_LD105], 
+            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD113, WriteAdr], 
+            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 3 element, one lone, B/H 
+// ASIMD load, 3 element, one lane, S 
+// ASIMD load, 3 element, one lane, D 
+def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_LD3I1, WriteAdr], 
+            (instregex "^LD3i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD load, 3 element, all lanes, D-form, B/H/S 
+// ASIMD load, 3 element, all lanes, D-form, D 
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S 
+// ASIMD load, 3 element, all lanes, Q-form, D 
+def : InstRW<[A64FXWrite_LD104], 
+            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD112, WriteAdr], 
+            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 4 element, multiple, D-form, B/H/S 
+// ASIMD load, 4 element, multiple, Q-form, B/H/S 
+// ASIMD load, 4 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_LD107], 
+            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD115, WriteAdr], 
+            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD load, 4 element, one lane, B/H 
+// ASIMD load, 4 element, one lane, S 
+// ASIMD load, 4 element, one lane, D 
+def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_LD4I1, WriteAdr], 
+            (instregex "^LD4i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD load, 4 element, all lanes, D-form, B/H/S 
+// ASIMD load, 4 element, all lanes, D-form, D 
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S 
+// ASIMD load, 4 element, all lanes, Q-form, D 
+def : InstRW<[A64FXWrite_LD106], 
+            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_LD114, WriteAdr], 
+            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+//-- 
+// 3.16 ASIMD Store Instructions 
+//-- 
+ 
+// ASIMD store, 1 element, multiple, 1 reg, D-form 
+// ASIMD store, 1 element, multiple, 1 reg, Q-form 
+def : InstRW<[A64FXWrite_ST10], 
+            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST14, WriteAdr], 
+            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 1 element, multiple, 2 reg, D-form 
+// ASIMD store, 1 element, multiple, 2 reg, Q-form 
+def : InstRW<[A64FXWrite_ST11], 
+            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST15, WriteAdr], 
+            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 1 element, multiple, 3 reg, D-form 
+// ASIMD store, 1 element, multiple, 3 reg, Q-form 
+def : InstRW<[A64FXWrite_ST12], 
+            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST16, WriteAdr], 
+            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 1 element, multiple, 4 reg, D-form 
+// ASIMD store, 1 element, multiple, 4 reg, Q-form 
+def : InstRW<[A64FXWrite_ST13], 
+            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST17, WriteAdr], 
+            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 1 element, one lane, B/H/S 
+// ASIMD store, 1 element, one lane, D 
+def : InstRW<[A64FXWrite_ST10], 
+            (instregex "^ST1i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_ST14, WriteAdr], 
+            (instregex "^ST1i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD store, 2 element, multiple, D-form, B/H/S 
+// ASIMD store, 2 element, multiple, Q-form, B/H/S 
+// ASIMD store, 2 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_ST11], 
+            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST15, WriteAdr], 
+            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 2 element, one lane, B/H/S 
+// ASIMD store, 2 element, one lane, D 
+def : InstRW<[A64FXWrite_ST11], 
+            (instregex "^ST2i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_ST15, WriteAdr], 
+            (instregex "^ST2i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD store, 3 element, multiple, D-form, B/H/S 
+// ASIMD store, 3 element, multiple, Q-form, B/H/S 
+// ASIMD store, 3 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_ST12], 
+            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST16, WriteAdr], 
+            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 3 element, one lane, B/H 
+// ASIMD store, 3 element, one lane, S 
+// ASIMD store, 3 element, one lane, D 
+def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_ST16, WriteAdr], 
+            (instregex "^ST3i(8|16|32|64)_POST$")>; 
+ 
+// ASIMD store, 4 element, multiple, D-form, B/H/S 
+// ASIMD store, 4 element, multiple, Q-form, B/H/S 
+// ASIMD store, 4 element, multiple, Q-form, D 
+def : InstRW<[A64FXWrite_ST13], 
+            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 
+def : InstRW<[A64FXWrite_ST17, WriteAdr], 
+            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 
+ 
+// ASIMD store, 4 element, one lane, B/H 
+// ASIMD store, 4 element, one lane, S 
+// ASIMD store, 4 element, one lane, D 
+def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; 
+def : InstRW<[A64FXWrite_ST17, WriteAdr], 
+            (instregex "^ST4i(8|16|32|64)_POST$")>; 
+ 
+// V8.1a Atomics (LSE) 
+def : InstRW<[A64FXWrite_CAS, WriteAtomic], 
+            (instrs CASB, CASH, CASW, CASX)>; 
+ 
+def : InstRW<[A64FXWrite_CAS, WriteAtomic], 
+            (instrs CASAB, CASAH, CASAW, CASAX)>; 
+ 
+def : InstRW<[A64FXWrite_CAS, WriteAtomic], 
+            (instrs CASLB, CASLH, CASLW, CASLX)>; 
+ 
+def : InstRW<[A64FXWrite_CAS, WriteAtomic], 
+            (instrs CASALB, CASALH, CASALW, CASALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, 
+             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, 
+             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, 
+             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, 
+             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, 
+             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, 
+             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, 
+             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, 
+             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, 
+             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; 
+ 
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 
+            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, 
+             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, 
+             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, 
+             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; 
+ 
+def : InstRW<[A64FXWrite_SWP, WriteAtomic], 
+            (instrs SWPB, SWPH, SWPW, SWPX)>; 
+ 
+def : InstRW<[A64FXWrite_SWP, WriteAtomic], 
+            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; 
+ 
+def : InstRW<[A64FXWrite_SWP, WriteAtomic], 
+            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; 
+ 
+def : InstRW<[A64FXWrite_SWP, WriteAtomic], 
+            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; 
+ 
+def : InstRW<[A64FXWrite_STUR, WriteAtomic], 
+            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; 
+ 
+// [ 1]   "abs  $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>; 
+ 
+// [ 2]   "add  $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>; 
+ 
+// [ 3]   "add  $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>; 
+ 
+// [ 4]   "add  $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>; 
+ 
+// [ 5]   "addpl        $Rd, $Rn, $imm6"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>; 
+ 
+// [ 6]   "addvl        $Rd, $Rn, $imm6"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>; 
+ 
+// [ 7]   "adr  $Zd, [$Zn, $Zm]"; 
+def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>; 
+ 
+// [ 8]   "and  $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>; 
+ 
+// [ 9]   "and  $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>; 
+ 
+// [10]   "and  $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>; 
+ 
+// [11]   "and  $Zdn, $_Zdn, $imms13"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>; 
+ 
+// [12]   "ands $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>; 
+ 
+// [13]   "andv $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>; 
+ 
+// [14]   "asr  $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>; 
+ 
+// [15]   "asr  $Zd, $Zn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>; 
+ 
+// [16]   "asr  $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>; 
+ 
+// [17]   "asr  $Zdn, $Pg/m, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>; 
+ 
+// [18]   "asrd $Zdn, $Pg/m, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>; 
+ 
+// [19]   "asrr $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>; 
+ 
+// [20]   "bic  $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>; 
+ 
+// [21]   "bic  $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>; 
+ 
+// [22]   "bic  $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>; 
+ 
+// [23]   "bics $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>; 
+ 
+// [24]   "brka $Pd, $Pg/m, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>; 
+ 
+// [25]   "brka $Pd, $Pg/z, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>; 
+ 
+// [26]   "brkas        $Pd, $Pg/z, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>; 
+ 
+// [27]   "brkb $Pd, $Pg/m, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>; 
+ 
+// [28]   "brkb $Pd, $Pg/z, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>; 
+ 
+// [29]   "brkbs        $Pd, $Pg/z, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>; 
+ 
+// [30]   "brkn $Pdm, $Pg/z, $Pn, $_Pdm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>; 
+ 
+// [31]   "brkns        $Pdm, $Pg/z, $Pn, $_Pdm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>; 
+ 
+// [32]   "brkpa        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>; 
+ 
+// [33]   "brkpas       $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>; 
+ 
+// [34]   "brkpb        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>; 
+ 
+// [35]   "brkpbs       $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>; 
+ 
+// [36]   "clasta       $Rdn, $Pg, $_Rdn, $Zm"; 
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>; 
+ 
+// [37]   "clasta       $Vdn, $Pg, $_Vdn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>; 
+ 
+// [38]   "clasta       $Zdn, $Pg, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>; 
+ 
+// [39]   "clastb       $Rdn, $Pg, $_Rdn, $Zm"; 
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>; 
+ 
+// [40]   "clastb       $Vdn, $Pg, $_Vdn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>; 
+ 
+// [41]   "clastb       $Zdn, $Pg, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>; 
+ 
+// [42]   "cls  $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>; 
+ 
+// [43]   "clz  $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>; 
+ 
+// [44]   "cmpeq        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>; 
+ 
+// [45]   "cmpeq        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>; 
+ 
+// [46]   "cmpge        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>; 
+ 
+// [47]   "cmpge        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>; 
+ 
+// [48]   "cmpgt        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>; 
+ 
+// [49]   "cmpgt        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>; 
+ 
+// [50]   "cmphi        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>; 
+ 
+// [51]   "cmphi        $Pd, $Pg/z, $Zn, $imm7"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>; 
+ 
+// [52]   "cmphs        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>; 
+ 
+// [53]   "cmphs        $Pd, $Pg/z, $Zn, $imm7"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>; 
+ 
+// [54]   "cmple        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>; 
+ 
+// [55]   "cmple        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>; 
+ 
+// [56]   "cmplo        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>; 
+ 
+// [57]   "cmplo        $Pd, $Pg/z, $Zn, $imm7"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>; 
+ 
+// [58]   "cmpls        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>; 
+ 
+// [59]   "cmpls        $Pd, $Pg/z, $Zn, $imm7"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>; 
+ 
+// [60]   "cmplt        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>; 
+ 
+// [61]   "cmplt        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>; 
+ 
+// [62]   "cmpne        $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>; 
+ 
+// [63]   "cmpne        $Pd, $Pg/z, $Zn, $imm5"; 
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>; 
+ 
+// [64]   "cnot $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>; 
+ 
+// [65]   "cnt  $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>; 
+ 
+// [66]   "cntb $Rd, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>; 
+ 
+// [67]   "cntd $Rd, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>; 
+ 
+// [68]   "cnth $Rd, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>; 
+ 
+// [69]   "cntp $Rd, $Pg, $Pn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>; 
+ 
+// [70]   "cntw $Rd, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>; 
+ 
+// [71]   "compact      $Zd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>; 
+ 
+// [72]   "cpy  $Zd, $Pg/m, $Rn"; 
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>; 
+ 
+// [73]   "cpy  $Zd, $Pg/m, $Vn"; 
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>; 
+ 
+// [74]   "cpy  $Zd, $Pg/m, $imm"; 
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>; 
+ 
+// [75]   "cpy  $Zd, $Pg/z, $imm"; 
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>; 
+ 
+// [76]   "ctermeq      $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>; 
+ 
+// [77]   "ctermne      $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>; 
+ 
+// [78]   "decb $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>; 
+ 
+// [79]   "decd $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>; 
+ 
+// [80]   "decd $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>; 
+ 
+// [81]   "dech $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>; 
+ 
+// [82]   "dech $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>; 
+ 
+// [83]   "decp $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>; 
+ 
+// [84]   "decp $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>; 
+ 
+// [85]   "decw $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>; 
+ 
+// [86]   "decw $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>; 
+ 
+// [87]   "dup  $Zd, $Rn"; 
+def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>; 
+ 
+// [88]   "dup  $Zd, $Zn$idx"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>; 
+ 
+// [89]   "dup  $Zd, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>; 
+ 
+// [90]   "dupm $Zd, $imms"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>; 
+ 
+// [91]   "eor  $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>; 
+ 
+// [92]   "eor  $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>; 
+ 
+// [93]   "eor  $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>; 
+ 
+// [94]   "eor  $Zdn, $_Zdn, $imms13"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>; 
+ 
+// [95]   "eors $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>; 
+ 
+// [96]   "eorv $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>; 
+ 
+// [97]   "ext  $Zdn, $_Zdn, $Zm, $imm8"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>; 
+ 
+// [99]   "fabd $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>; 
+ 
+// [100]   "fabs        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>; 
+ 
+// [101]   "facge       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>; 
+ 
+// [102]   "facgt       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>; 
+ 
+// [103]   "fadd        $Zd, $Zn, $Zm"; def is line 1638 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>; 
+ 
+// [104]   "fadd        $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>; 
+ 
+// [105]   "fadd        $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>; 
+ 
+// [106]   "fadda       $Vdn, $Pg, $_Vdn, $Zm"; 
+def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>; 
+ 
+// [107]   "faddv       $Vd, $Pg, $Zn"; 
+// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle 
+// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle 
+// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle 
+def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>; 
+def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>; 
+def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>; 
+ 
+// [108]   "fcadd       $Zdn, $Pg/m, $_Zdn, $Zm, $imm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>; 
+ 
+// [109]   "fcmeq       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>; 
+ 
+// [110]   "fcmeq       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>; 
+ 
+// [111]   "fcmge       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>; 
+ 
+// [112]   "fcmge       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>; 
+ 
+// [113]   "fcmgt       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>; 
+ 
+// [114]   "fcmgt       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>; 
+ 
+// [115]   "fcmla       $Zda, $Pg/m, $Zn, $Zm, $imm"; 
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>; 
+ 
+// [116]   "fcmla       $Zda, $Zn, $Zm$iop, $imm"; 
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>; 
+ 
+// [117]   "fcmle       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>; 
+ 
+// [118]   "fcmlt       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>; 
+ 
+// [119]   "fcmne       $Pd, $Pg/z, $Zn, #0.0"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>; 
+ 
+// [120]   "fcmne       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>; 
+ 
+// [121]   "fcmuo       $Pd, $Pg/z, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>; 
+ 
+// [122]   "fcpy        $Zd, $Pg/m, $imm8"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>; 
+ 
+// [123]   "fcvt        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>; 
+ 
+// [124]   "fcvtzs      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>; 
+ 
+// [125]   "fcvtzu      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>; 
+ 
+// [126]   "fdiv        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>; 
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>; 
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>; 
+ 
+// [127]   "fdivr       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>; 
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>; 
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>; 
+ 
+// [128]   "fdup        $Zd, $imm8"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>; 
+ 
+// [129]   "fexpa       $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>; 
+ 
+// [130]   "fmad        $Zdn, $Pg/m, $Zm, $Za"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>; 
+ 
+// [131]   "fmax        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>; 
+ 
+// [132]   "fmax        $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>; 
+ 
+// [133]   "fmaxnm      $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>; 
+ 
+// [134]   "fmaxnm      $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>; 
+ 
+// [135]   "fmaxnmv     $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>; 
+ 
+// [136]   "fmaxv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>; 
+ 
+// [137]   "fmin        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>; 
+ 
+// [138]   "fmin        $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>; 
+ 
+// [139]   "fminnm      $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>; 
+ 
+// [140]   "fminnm      $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>; 
+ 
+// [141]   "fminnmv     $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>; 
+ 
+// [142]   "fminv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>; 
+ 
+// [143]   "fmla        $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>; 
+ 
+// [144]   "fmla        $Zda, $Zn, $Zm$iop"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>; 
+ 
+// [145]   "fmls        $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>; 
+ 
+// [146]   "fmls        $Zda, $Zn, $Zm$iop"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>; 
+ 
+// [147]   "fmsb        $Zdn, $Pg/m, $Zm, $Za"; 
+ 
+// [148]   "fmul        $Zd, $Zn, $Zm"; 
+ 
+// [149]   "fmul        $Zd, $Zn, $Zm$iop"; 
+ 
+// [150]   "fmul        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+ 
+// [151]   "fmul        $Zdn, $Pg/m, $_Zdn, $i1"; 
+ 
+// [152]   "fmulx       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+ 
+// [153]   "fneg        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>; 
+ 
+// [154]   "fnmad       $Zdn, $Pg/m, $Zm, $Za"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>; 
+ 
+// [155]   "fnmla       $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>; 
+ 
+// [156]   "fnmls       $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>; 
+ 
+// [157]   "fnmsb       $Zdn, $Pg/m, $Zm, $Za"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>; 
+ 
+// [158]   "frecpe      $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>; 
+ 
+// [159]   "frecps      $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>; 
+ 
+// [160]   "frecpx      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>; 
+ 
+// [161]   "frinta      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>; 
+ 
+// [162]   "frinti      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>; 
+ 
+// [163]   "frintm      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>; 
+ 
+// [164]   "frintn      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>; 
+ 
+// [165]   "frintp      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>; 
+ 
+// [166]   "frintx      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>; 
+ 
+// [167]   "frintz      $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>; 
+ 
+// [168]   "frsqrte     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>; 
+ 
+// [169]   "frsqrts     $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>; 
+ 
+// [170]   "fscale      $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>; 
+ 
+// [171]   "fsqrt       $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>; 
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>; 
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>; 
+ 
+// [172]   "fsub        $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>; 
+ 
+// [173]   "fsub        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>; 
+ 
+// [174]   "fsub        $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>; 
+ 
+// [175]   "fsubr       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>; 
+ 
+// [176]   "fsubr       $Zdn, $Pg/m, $_Zdn, $i1"; 
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>; 
+ 
+// [177]   "ftmad       $Zdn, $_Zdn, $Zm, $imm3"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>; 
+ 
+// [178]   "ftsmul      $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>; 
+ 
+// [180]   "incb        $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>; 
+ 
+// [181]   "incd        $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>; 
+ 
+// [182]   "incd        $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>; 
+ 
+// [183]   "inch        $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>; 
+ 
+// [184]   "inch        $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>; 
+ 
+// [185]   "incp        $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>; 
+ 
+// [186]   "incp        $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>; 
+ 
+// [187]   "incw        $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>; 
+ 
+// [188]   "incw        $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>; 
+ 
+// [189]   "index       $Zd, $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>; 
+ 
+// [190]   "index       $Zd, $Rn, $imm5"; 
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>; 
+ 
+// [191]   "index       $Zd, $imm5, $Rm"; 
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>; 
+ 
+// [192]   "index       $Zd, $imm5, $imm5b"; 
+def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>; 
+ 
+// [193]   "insr        $Zdn, $Rm"; 
+def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>; 
+ 
+// [194]   "insr        $Zdn, $Vm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>; 
+ 
+// [195]   "lasta       $Rd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>; 
+ 
+// [196]   "lasta       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>; 
+ 
+// [197]   "lastb       $Rd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>; 
+ 
+// [198]   "lastb       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>; 
+ 
+// [199]   "ld1b        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>; 
+ 
+// [200]   "ld1b        $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>; 
+ 
+// [201]   "ld1b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>; 
+ 
+// [202]   "ld1b        $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>; 
+ 
+// [203]   "ld1d        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>; 
+ 
+// [204]   "ld1d        $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>; 
+ 
+// [205]   "ld1d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>; 
+ 
+// [206]   "ld1d        $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>; 
+ 
+// [207]   "ld1h        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>; 
+ 
+// [208]   "ld1h        $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>; 
+ 
+// [209]   "ld1h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>; 
+ 
+// [210]   "ld1h        $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>; 
+ 
+// [211]   "ld1rb       $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>; 
+ 
+// [212]   "ld1rd       $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>; 
+ 
+// [213]   "ld1rh       $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>; 
+ 
+// [214]   "ld1rqb      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>; 
+ 
+// [215]   "ld1rqb      $Zt, $Pg/z, [$Rn, $imm4]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>; 
+ 
+// [216]   "ld1rqd      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>; 
+ 
+// [217]   "ld1rqd      $Zt, $Pg/z, [$Rn, $imm4]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>; 
+ 
+// [218]   "ld1rqh      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>; 
+ 
+// [219]   "ld1rqh      $Zt, $Pg/z, [$Rn, $imm4]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>; 
+ 
+// [220]   "ld1rqw      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>; 
+ 
+// [221]   "ld1rqw      $Zt, $Pg/z, [$Rn, $imm4]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>; 
+ 
+// [222]   "ld1rsb      $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>; 
+ 
+// [223]   "ld1rsh      $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>; 
+ 
+// [224]   "ld1rsw      $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>; 
+ 
+// [225]   "ld1rw       $Zt, $Pg/z, [$Rn, $imm6]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>; 
+ 
+// [226]   "ld1sb       $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>; 
+ 
+// [227]   "ld1sb       $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>; 
+ 
+// [228]   "ld1sb       $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>; 
+ 
+// [229]   "ld1sb       $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>; 
+ 
+// [230]   "ld1sh       $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>; 
+ 
+// [231]   "ld1sh       $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>; 
+ 
+// [232]   "ld1sh       $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>; 
+ 
+// [233]   "ld1sh       $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>; 
+ 
+// [234]   "ld1sw       $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>; 
+ 
+// [235]   "ld1sw       $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>; 
+ 
+// [236]   "ld1sw       $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>; 
+ 
+// [237]   "ld1sw       $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>; 
+ 
+// [238]   "ld1w        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>; 
+ 
+// [239]   "ld1w        $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>; 
+ 
+// [240]   "ld1w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>; 
+ 
+// [241]   "ld1w        $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>; 
+ 
+// [242]   "ld2b        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>; 
+ 
+// [243]   "ld2b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>; 
+ 
+// [244]   "ld2d        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>; 
+ 
+// [245]   "ld2d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>; 
+ 
+// [246]   "ld2h        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>; 
+ 
+// [247]   "ld2h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>; 
+ 
+// [248]   "ld2w        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>; 
+ 
+// [249]   "ld2w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>; 
+ 
+// [250]   "ld3b        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>; 
+ 
+// [251]   "ld3b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>; 
+ 
+// [252]   "ld3d        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>; 
+ 
+// [253]   "ld3d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>; 
+ 
+// [254]   "ld3h        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>; 
+ 
+// [255]   "ld3h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>; 
+ 
+// [256]   "ld3w        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>; 
+ 
+// [257]   "ld3w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>; 
+ 
+// [258]   "ld4b        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>; 
+ 
+// [259]   "ld4b        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>; 
+ 
+// [260]   "ld4d        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>; 
+ 
+// [261]   "ld4d        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>; 
+ 
+// [262]   "ld4h        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>; 
+ 
+// [263]   "ld4h        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>; 
+ 
+// [264]   "ld4w        $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>; 
+ 
+// [265]   "ld4w        $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>; 
+ 
+// [266]   "ldff1b      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>; 
+ 
+// [267]   "ldff1b      $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>; 
+ 
+// [268]   "ldff1b      $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>; 
+ 
+// [269]   "ldff1d      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>; 
+ 
+// [270]   "ldff1d      $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>; 
+ 
+// [271]   "ldff1d      $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>; 
+ 
+// [272]   "ldff1h      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>; 
+ 
+// [273]   "ldff1h      $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>; 
+ 
+// [274]   "ldff1h      $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>; 
+ 
+// [275]   "ldff1sb     $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>; 
+ 
+// [276]   "ldff1sb     $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>; 
+ 
+// [277]   "ldff1sb     $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>; 
+ 
+// [278]   "ldff1sh     $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>; 
+ 
+// [279]   "ldff1sh     $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>; 
+ 
+// [280]   "ldff1sh     $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>; 
+ 
+// [281]   "ldff1sw     $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>; 
+ 
+// [282]   "ldff1sw     $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>; 
+ 
+// [283]   "ldff1sw     $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>; 
+ 
+// [284]   "ldff1w      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>; 
+ 
+// [285]   "ldff1w      $Zt, $Pg/z, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>; 
+ 
+// [286]   "ldff1w      $Zt, $Pg/z, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>; 
+ 
+// [287]   "ldnf1b      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>; 
+ 
+// [288]   "ldnf1d      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>; 
+ 
+// [289]   "ldnf1h      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>; 
+ 
+// [290]   "ldnf1sb     $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>; 
+ 
+// [291]   "ldnf1sh     $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>; 
+ 
+// [292]   "ldnf1sw     $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>; 
+ 
+// [293]   "ldnf1w      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>; 
+ 
+// [294]   "ldnt1b      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>; 
+ 
+// [295]   "ldnt1b      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>; 
+ 
+// [296]   "ldnt1d      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>; 
+ 
+// [297]   "ldnt1d      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>; 
+ 
+// [298]   "ldnt1h      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>; 
+ 
+// [299]   "ldnt1h      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>; 
+ 
+// [300]   "ldnt1w      $Zt, $Pg/z, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>; 
+ 
+// [301]   "ldnt1w      $Zt, $Pg/z, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>; 
+ 
+// [302]   "ldr $Pt, [$Rn, $imm9, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>; 
+ 
+// [303]   "ldr $Zt, [$Rn, $imm9, mul vl]"; 
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>; 
+ 
+// [304]   "lsl $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>; 
+ 
+// [305]   "lsl $Zd, $Zn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>; 
+ 
+// [306]   "lsl $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>; 
+ 
+// [307]   "lsl $Zdn, $Pg/m, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>; 
+ 
+// [308]   "lslr        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>; 
+ 
+// [309]   "lsr $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>; 
+ 
+// [310]   "lsr $Zd, $Zn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>; 
+ 
+// [311]   "lsr $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>; 
+ 
+// [312]   "lsr $Zdn, $Pg/m, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>; 
+ 
+// [313]   "lsrr        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>; 
+ 
+// [314]   "mad $Zdn, $Pg/m, $Zm, $Za"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>; 
+ 
+// [315]   "mla $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>; 
+ 
+// [316]   "mls $Zda, $Pg/m, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>; 
+ 
+// [317]   "movprfx     $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>; 
+ 
+// [318]   "movprfx     $Zd, $Pg/z, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>; 
+ 
+// [319]   "movprfx     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>; 
+ 
+// [320]   "msb $Zdn, $Pg/m, $Zm, $Za"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>; 
+ 
+// [321]   "mul $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>; 
+ 
+// [322]   "mul $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>; 
+ 
+// [323]   "nand        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>; 
+ 
+// [324]   "nands       $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>; 
+ 
+// [325]   "neg $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>; 
+ 
+// [326]   "nor $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>; 
+ 
+// [327]   "nors        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>; 
+ 
+// [328]   "not $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>; 
+ 
+// [329]   "orn $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>; 
+ 
+// [330]   "orns        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>; 
+ 
+// [331]   "orr $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>; 
+ 
+// [332]   "orr $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>; 
+ 
+// [333]   "orr $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>; 
+ 
+// [334]   "orr $Zdn, $_Zdn, $imms13"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>; 
+ 
+// [335]   "orrs        $Pd, $Pg/z, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>; 
+ 
+// [336]   "orv $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>; 
+ 
+// [337]   "pfalse      $Pd"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>; 
+ 
+// [338]   "pnext       $Pdn, $Pg, $_Pdn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>; 
+ 
+// [339]   "prfb        $prfop, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>; 
+ 
+// [340]   "prfb        $prfop, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>; 
+ 
+// [341]   "prfb        $prfop, $Pg, [$Rn, $imm6, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>; 
+ 
+// [342]   "prfb        $prfop, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>; 
+ 
+// [343]   "prfd        $prfop, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>; 
+ 
+// [344]   "prfd        $prfop, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>; 
+ 
+// [345]   "prfd        $prfop, $Pg, [$Rn, $imm6, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>; 
+ 
+// [346]   "prfd        $prfop, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>; 
+ 
+// [347]   "prfh        $prfop, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>; 
+ 
+// [348]   "prfh        $prfop, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>; 
+ 
+// [349]   "prfh        $prfop, $Pg, [$Rn, $imm6, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>; 
+ 
+// [350]   "prfh        $prfop, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>; 
+ 
+// [351]   "prfw        $prfop, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>; 
+ 
+// [352]   "prfw        $prfop, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>; 
+ 
+// [353]   "prfw        $prfop, $Pg, [$Rn, $imm6, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>; 
+ 
+// [354]   "prfw        $prfop, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>; 
+ 
+// [355]   "ptest       $Pg, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>; 
+ 
+// [356]   "ptrue       $Pd, $pattern"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>; 
+ 
+// [357]   "ptrues      $Pd, $pattern"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>; 
+ 
+// [358]   "punpkhi     $Pd, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>; 
+ 
+// [359]   "punpklo     $Pd, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>; 
+ 
+// [360]   "rbit        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>; 
+ 
+// [361]   "rdffr       $Pd"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>; 
+ 
+// [362]   "rdffr       $Pd, $Pg/z"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>; 
+ 
+// [363]   "rdffrs      $Pd, $Pg/z"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>; 
+ 
+// [364]   "rdvl        $Rd, $imm6"; 
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>; 
+ 
+// [365]   "rev $Pd, $Pn"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>; 
+ 
+// [366]   "rev $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>; 
+ 
+// [367]   "revb        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>; 
+ 
+// [368]   "revh        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>; 
+ 
+// [369]   "revw        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>; 
+ 
+// [370]   "sabd        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>; 
+ 
+// [371]   "saddv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>; 
+ 
+// [372]   "scvtf       $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>; 
+ 
+// [373]   "sdiv        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>; 
+ 
+// [374]   "sdivr       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>; 
+ 
+// [375]   "sdot        $Zda, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>; 
+ 
+// [376]   "sdot        $Zda, $Zn, $Zm$iop"; 
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>; 
+ 
+// [377]   "sel $Pd, $Pg, $Pn, $Pm"; 
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>; 
+ 
+// [378]   "sel $Zd, $Pg, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>; 
+ 
+// [379]   "setffr"; 
+def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>; 
+ 
+// [380]   "smax        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>; 
+ 
+// [381]   "smax        $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>; 
+ 
+// [382]   "smaxv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>; 
+ 
+// [383]   "smin        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>; 
+ 
+// [384]   "smin        $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>; 
+ 
+// [385]   "sminv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>; 
+ 
+// [386]   "smulh       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>; 
+ 
+// [387]   "splice      $Zdn, $Pg, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>; 
+ 
+// [388]   "sqadd       $Zd, $Zn, $Zm"; 
+ 
+// [389]   "sqadd       $Zdn, $_Zdn, $imm"; 
+ 
+// [390]   "sqdecb      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>; 
+ 
+// [391]   "sqdecb      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>; 
+ 
+// [392]   "sqdecd      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>; 
+ 
+// [393]   "sqdecd      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>; 
+ 
+// [394]   "sqdecd      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>; 
+ 
+// [395]   "sqdech      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>; 
+ 
+// [396]   "sqdech      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>; 
+ 
+// [397]   "sqdech      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>; 
+ 
+// [398]   "sqdecp      $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>; 
+ 
+// [399]   "sqdecp      $Rdn, $Pg, $_Rdn"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>; 
+ 
+// [400]   "sqdecp      $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>; 
+ 
+// [401]   "sqdecw      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>; 
+ 
+// [402]   "sqdecw      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>; 
+ 
+// [403]   "sqdecw      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>; 
+ 
+// [404]   "sqincb      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>; 
+ 
+// [405]   "sqincb      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>; 
+ 
+// [406]   "sqincd      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>; 
+ 
+// [407]   "sqincd      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>; 
+ 
+// [408]   "sqincd      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>; 
+ 
+// [409]   "sqinch      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>; 
+ 
+// [410]   "sqinch      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>; 
+ 
+// [411]   "sqinch      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>; 
+ 
+// [412]   "sqincp      $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>; 
+ 
+// [413]   "sqincp      $Rdn, $Pg, $_Rdn"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>; 
+ 
+// [414]   "sqincp      $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>; 
+ 
+// [415]   "sqincw      $Rdn, $_Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>; 
+ 
+// [416]   "sqincw      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>; 
+ 
+// [417]   "sqincw      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>; 
+ 
+// [418]   "sqsub       $Zd, $Zn, $Zm"; 
+ 
+// [419]   "sqsub       $Zdn, $_Zdn, $imm"; 
+ 
+// [420]   "st1b        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>; 
+ 
+// [421]   "st1b        $Zt, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>; 
+ 
+// [422]   "st1b        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>; 
+ 
+// [423]   "st1b        $Zt, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>; 
+ 
+// [424]   "st1d        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>; 
+ 
+// [425]   "st1d        $Zt, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>; 
+ 
+// [426]   "st1d        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>; 
+ 
+// [427]   "st1d        $Zt, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>; 
+ 
+// [428]   "st1h        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>; 
+ 
+// [429]   "st1h        $Zt, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>; 
+ 
+// [430]   "st1h        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>; 
+ 
+// [431]   "st1h        $Zt, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>; 
+ 
+// [432]   "st1w        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>; 
+ 
+// [433]   "st1w        $Zt, $Pg, [$Rn, $Zm]"; 
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>; 
+ 
+// [434]   "st1w        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>; 
+ 
+// [435]   "st1w        $Zt, $Pg, [$Zn, $imm5]"; 
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>; 
+ 
+// [436]   "st2b        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>; 
+ 
+// [437]   "st2b        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>; 
+ 
+// [438]   "st2d        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>; 
+ 
+// [439]   "st2d        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>; 
+ 
+// [440]   "st2h        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>; 
+ 
+// [441]   "st2h        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>; 
+ 
+// [442]   "st2w        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>; 
+ 
+// [443]   "st2w        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>; 
+ 
+// [444]   "st3b        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>; 
+ 
+// [445]   "st3b        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>; 
+ 
+// [446]   "st3d        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>; 
+ 
+// [447]   "st3d        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>; 
+ 
+// [448]   "st3h        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>; 
+ 
+// [449]   "st3h        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>; 
+ 
+// [450]   "st3w        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>; 
+ 
+// [451]   "st3w        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>; 
+ 
+// [452]   "st4b        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>; 
+ 
+// [453]   "st4b        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>; 
+ 
+// [454]   "st4d        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>; 
+ 
+// [455]   "st4d        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>; 
+ 
+// [456]   "st4h        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>; 
+ 
+// [457]   "st4h        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>; 
+ 
+// [458]   "st4w        $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>; 
+ 
+// [459]   "st4w        $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>; 
+ 
+// [460]   "stnt1b      $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>; 
+ 
+// [461]   "stnt1b      $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>; 
+ 
+// [462]   "stnt1d      $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>; 
+ 
+// [463]   "stnt1d      $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>; 
+ 
+// [464]   "stnt1h      $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>; 
+ 
+// [465]   "stnt1h      $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>; 
+ 
+// [466]   "stnt1w      $Zt, $Pg, [$Rn, $Rm]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>; 
+ 
+// [467]   "stnt1w      $Zt, $Pg, [$Rn, $imm4, mul vl]"; 
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>; 
+ 
+// [468]   "str $Pt, [$Rn, $imm9, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>; 
+ 
+// [469]   "str $Zt, [$Rn, $imm9, mul vl]"; 
+def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>; 
+ 
+// [470]   "sub $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>; 
+ 
+// [471]   "sub $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>; 
+ 
+// [472]   "sub $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>; 
+ 
+// [473]   "subr        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>; 
+ 
+// [474]   "subr        $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>; 
+ 
+// [475]   "sunpkhi     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>; 
+ 
+// [476]   "sunpklo     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>; 
+ 
+// [477]   "sxtb        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>; 
+ 
+// [478]   "sxth        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>; 
+ 
+// [479]   "sxtw        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>; 
+ 
+// [480]   "tbl $Zd, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>; 
+ 
+// [481]   "trn1        $Pd, $Pn, $Pm"; 
+ 
+// [482]   "trn1        $Zd, $Zn, $Zm"; 
+ 
+// [483]   "trn2        $Pd, $Pn, $Pm"; 
+ 
+// [484]   "trn2        $Zd, $Zn, $Zm"; 
+ 
+// [486]   "uabd        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>; 
+ 
+// [487]   "uaddv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>; 
+ 
+// [488]   "ucvtf       $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>; 
+ 
+// [489]   "udiv        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>; 
+ 
+// [490]   "udivr       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>; 
+ 
+// [491]   "udot        $Zda, $Zn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>; 
+ 
+// [492]   "udot        $Zda, $Zn, $Zm$iop"; 
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>; 
+ 
+// [493]   "umax        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>; 
+ 
+// [494]   "umax        $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>; 
+ 
+// [495]   "umaxv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>; 
+ 
+// [496]   "umin        $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>; 
+ 
+// [497]   "umin        $Zdn, $_Zdn, $imm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>; 
+ 
+// [498]   "uminv       $Vd, $Pg, $Zn"; 
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>; 
+ 
+// [499]   "umulh       $Zdn, $Pg/m, $_Zdn, $Zm"; 
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>; 
+ 
+// [500]   "uqadd       $Zd, $Zn, $Zm"; 
+ 
+// [501]   "uqadd       $Zdn, $_Zdn, $imm"; 
+ 
+// [502]   "uqdecb      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>; 
+ 
+// [503]   "uqdecd      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>; 
+ 
+// [504]   "uqdecd      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>; 
+ 
+// [505]   "uqdech      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>; 
+ 
+// [506]   "uqdech      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>; 
+ 
+// [507]   "uqdecp      $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>; 
+ 
+// [508]   "uqdecp      $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>; 
+ 
+// [509]   "uqdecw      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>; 
+ 
+// [510]   "uqdecw      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>; 
+ 
+// [511]   "uqincb      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>; 
+ 
+// [512]   "uqincd      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>; 
+ 
+// [513]   "uqincd      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>; 
+ 
+// [514]   "uqinch      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>; 
+ 
+// [515]   "uqinch      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>; 
+ 
+// [516]   "uqincp      $Rdn, $Pg"; 
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>; 
+ 
+// [517]   "uqincp      $Zdn, $Pg"; 
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>; 
+ 
+// [518]   "uqincw      $Rdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>; 
+ 
+// [519]   "uqincw      $Zdn, $pattern, mul $imm4"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>; 
+ 
+// [520]   "uqsub       $Zd, $Zn, $Zm"; 
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>; 
+ 
+// [521]   "uqsub       $Zdn, $_Zdn, $imm"; 
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>; 
+ 
+// [522]   "uunpkhi     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>; 
+ 
+// [523]   "uunpklo     $Zd, $Zn"; 
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>; 
+ 
+// [524]   "uxtb        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>; 
+ 
+// [525]   "uxth        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>; 
+ 
+// [526]   "uxtw        $Zd, $Pg/m, $Zn"; 
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>; 
+ 
+// [527]   "uzp1        $Pd, $Pn, $Pm"; 
+ 
+// [528]   "uzp1        $Zd, $Zn, $Zm"; 
+ 
+// [529]   "uzp2        $Pd, $Pn, $Pm"; 
+ 
+// [530]   "uzp2        $Zd, $Zn, $Zm"; 
+ 
+// [531]   "whilele     $Pd, $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>; 
+ 
+// [532]   "whilelo     $Pd, $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>; 
+ 
+// [533]   "whilels     $Pd, $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>; 
+ 
+// [534]   "whilelt     $Pd, $Rn, $Rm"; 
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>; 
+ 
+// [535]   "wrffr       $Pn"; 
+def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>; 
+ 
+// [536]   "zip1        $Pd, $Pn, $Pm"; 
+ 
+// [537]   "zip1        $Zd, $Zn, $Zm"; 
+ 
+// [538]   "zip2        $Pd, $Pn, $Pm"; 
+ 
+// [539]   "zip2        $Zd, $Zn, $Zm"; 
+ 
+} // SchedModel = A64FXModel 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
index 438371c1b6..0828d8a8c9 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -1,745 +1,745 @@
-//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for Huawei TSV110 to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
-
-// Huawei TSV110 scheduling machine model.
-def TSV110Model : SchedMachineModel {
-  let IssueWidth            =   4; // 4 micro-ops dispatched  per cycle. 
-  let MicroOpBufferSize     = 128; // 128 micro-op re-order buffer
-  let LoopMicroOpBufferSize =  16; 
-  let LoadLatency           =   4; // Optimistic load latency.
-  let MispredictPenalty     =  14; // Fetch + Decode/Rename/Dispatch + Branch
-  let CompleteModel         =   1;
-
-  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
-                                                    PAUnsupported.F);
-}
-
-// Define each kind of processor resource and number available on the TSV110,
-// which has 8 pipelines, each with its own queue where micro-ops wait for
-// their operands and issue out-of-order to one of eight execution pipelines.
-let SchedModel = TSV110Model in {
-  def TSV110UnitALU  : ProcResource<1>; // Int ALU
-  def TSV110UnitAB   : ProcResource<2>; // Int ALU/BRU
-  def TSV110UnitMDU  : ProcResource<1>; // Multi-Cycle
-  def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
-  def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
-  def TSV110UnitLdSt : ProcResource<2>; // Load/Store
-
-  def TSV110UnitF     : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
-  def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
-  def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
-}
-
-let SchedModel = TSV110Model in {
-
-//===----------------------------------------------------------------------===//
-// Map the target-defined scheduler read/write resources and latency for 
-// TSV110
-
-// Integer ALU
-def : WriteRes<WriteImm,   [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteI,     [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [TSV110UnitMDU]>   { let Latency = 2; } 
-def : WriteRes<WriteIEReg, [TSV110UnitMDU]>   { let Latency = 2; } 
-def : WriteRes<WriteExtr,  [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteIS,    [TSV110UnitALUAB]> { let Latency = 1; }
-
-// Integer Mul/MAC/Div
-def : WriteRes<WriteID32,  [TSV110UnitMDU]> { let Latency = 12;
-                                              let ResourceCycles = [12]; } 
-def : WriteRes<WriteID64,  [TSV110UnitMDU]> { let Latency = 20;
-                                              let ResourceCycles = [20]; }
-def : WriteRes<WriteIM32,  [TSV110UnitMDU]> { let Latency = 3; }
-def : WriteRes<WriteIM64,  [TSV110UnitMDU]> { let Latency = 4; }
-
-// Load
-def : WriteRes<WriteLD,    [TSV110UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteLDHi,  []> { let Latency = 4; }
-
-// Pre/Post Indexing
-def : WriteRes<WriteAdr,   [TSV110UnitALUAB]> { let Latency = 1; } 
-
-// Store
-def : WriteRes<WriteST,    [TSV110UnitLdSt]> { let Latency = 1; }
-def : WriteRes<WriteSTP,   [TSV110UnitLdSt]> { let Latency = 1; }
-def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
-
-// FP
-def : WriteRes<WriteF,     [TSV110UnitF]> { let Latency = 2; }
-def : WriteRes<WriteFCmp,  [TSV110UnitF]> { let Latency = 3; }
-def : WriteRes<WriteFCvt,  [TSV110UnitF]> { let Latency = 3; } 
-def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
-def : WriteRes<WriteFImm,  [TSV110UnitF]> { let Latency = 2; } 
-def : WriteRes<WriteFMul,  [TSV110UnitF]> { let Latency = 5; }
-
-// FP Div, Sqrt
-def : WriteRes<WriteFDiv,  [TSV110UnitFSU1]> { let Latency = 18; } 
-
-def : WriteRes<WriteV,     [TSV110UnitF]>     { let Latency = 4; }
-def : WriteRes<WriteVLD,   [TSV110UnitFLdSt]> { let Latency = 5; }
-def : WriteRes<WriteVST,   [TSV110UnitF]>     { let Latency = 1; }
-
-// Branch
-def : WriteRes<WriteBr,    [TSV110UnitAB]> { let Latency = 1; }
-def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
-def : WriteRes<WriteSys,     []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint,    []> { let Latency = 1; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 
-
-// Forwarding logic is modeled only for multiply and accumulate.
-def : ReadAdvance<ReadI,       0>;
-def : ReadAdvance<ReadISReg,   0>;
-def : ReadAdvance<ReadIEReg,   0>;
-def : ReadAdvance<ReadIM,      0>;
-def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
-def : ReadAdvance<ReadID,      0>;
-def : ReadAdvance<ReadExtrHi,  0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD,     0>;
-
-def : InstRW<[WriteI], (instrs COPY)>;
-
-// Detailed Refinements
-//===----------------------------------------------------------------------===//
-
-// Contains all of the TSV110 specific SchedWriteRes types. The approach
-// below is to define a generic SchedWriteRes for every combination of
-// latency and microOps. The naming conventions is to use a prefix, one field
-// for latency, and one or more microOp count/type designators.
-//   Prefix: TSV110Wr
-//       Latency: #cyc
-//   MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
-//
-// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
-//      1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
-//
-
-//===----------------------------------------------------------------------===//
-// Define Generic 1 micro-op types
-
-def TSV110Wr_1cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 1; }
-def TSV110Wr_1cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 1; }
-def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
-def TSV110Wr_1cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 1; }
-
-def TSV110Wr_2cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 2; }
-def TSV110Wr_2cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 2; }
-def TSV110Wr_2cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 2; }
-def TSV110Wr_2cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 2; }
-def TSV110Wr_2cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 2; }
-def TSV110Wr_2cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 2; }
-
-def TSV110Wr_3cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 3; }
-def TSV110Wr_3cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 3; }
-def TSV110Wr_3cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 3; }
-
-def TSV110Wr_4cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 4; }
-def TSV110Wr_4cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 4; }
-def TSV110Wr_4cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 4; }
-def TSV110Wr_4cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 4; }
-
-def TSV110Wr_5cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 5; }
-def TSV110Wr_5cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 5; }
-def TSV110Wr_5cyc_1FSU2  : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 5; }
-def TSV110Wr_5cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 5; }
-
-def TSV110Wr_6cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 6; }
-
-def TSV110Wr_7cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 7; }
-
-def TSV110Wr_8cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 8; }
-
-def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 11; }
-
-def TSV110Wr_12cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 12; }
-
-def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 17; }
-
-def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 18; }
-
-def TSV110Wr_20cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 20; }
-
-def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 24; }
-
-def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 31; }
-
-def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 36; }
-
-def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 38; }
-
-def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 64; }
-
-//===----------------------------------------------------------------------===//
-// Define Generic 2 micro-op types
-
-def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
-                                                TSV110UnitALUAB]> {
-  let Latency = 1;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_1LdSt_1ALUAB :  SchedWriteRes<[TSV110UnitLdSt,
-                                                 TSV110UnitALUAB]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
-                                                TSV110UnitLdSt]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_2F           : SchedWriteRes<[TSV110UnitF,
-                                                TSV110UnitF]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
-                                                TSV110UnitFSU2]> {
-  let Latency = 2;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_2F           : SchedWriteRes<[TSV110UnitF,
-                                                TSV110UnitF]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
-                                                TSV110UnitFSU2]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
-                                                TSV110UnitALUAB]> {
-  let Latency = 4;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_5cyc_1ALU_1F      : SchedWriteRes<[TSV110UnitALU,
-                                                TSV110UnitF]> {
-  let Latency     = 5;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_6cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt,
-                                                TSV110UnitLdSt]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
-                                                TSV110UnitALUAB]> {
-  let Latency = 6;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_7cyc_1F_1LdSt     : SchedWriteRes<[TSV110UnitF,
-                                                TSV110UnitLdSt]> {
-  let Latency = 7;
-  let NumMicroOps = 2;
-}
-
-def TSV110Wr_8cyc_2FSU1        : SchedWriteRes<[TSV110UnitFSU1,
-                                                TSV110UnitFSU1]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-
-def TSV110Wr_8cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1,
-                                                TSV110UnitFSU2]> {
-  let Latency = 8;
-  let NumMicroOps = 2;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 3 micro-op types
-
-def TSV110Wr_6cyc_3F       : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
-                                            TSV110UnitF]> {
-  let Latency     = 6;
-  let NumMicroOps = 3;
-}
-
-def TSV110Wr_6cyc_3LdSt    : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
-                                            TSV110UnitLdSt]> {
-  let Latency = 6;
-  let NumMicroOps = 3;
-}
-
-def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
-                                                         TSV110UnitLdSt]> {
-  let Latency = 7;
-  let NumMicroOps = 3;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 4 micro-op types
-
-def TSV110Wr_8cyc_4F          : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
-                                               TSV110UnitF, TSV110UnitF]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-}
-
-def TSV110Wr_8cyc_3F_1LdSt    : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
-                                               TSV110UnitF, TSV110UnitLdSt]> {
-  let Latency = 8;
-  let NumMicroOps = 4;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 5 micro-op types
-
-def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
-                                            TSV110UnitLdSt, TSV110UnitLdSt]> {
-  let Latency = 8;
-  let NumMicroOps = 5;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 8 micro-op types
-
-def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
-                                             TSV110UnitF, TSV110UnitF,
-                                             TSV110UnitLdSt, TSV110UnitLdSt,
-                                             TSV110UnitLdSt, TSV110UnitLdSt]> {
-  let Latency = 10;
-  let NumMicroOps = 8;
-}
-
-
-// Branch Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
-
-
-// Cryptography Extensions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
-def : InstRW<[TSV110Wr_2cyc_2F],    (instregex "^SHA1(H|SU0)")>;
-def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
-def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
-def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
-def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC],  (instregex "^CRC32.*$")>;
-
-
-// Arithmetic and Logical Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
-def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(BIC)S[WX]rr")>;
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
-def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
-def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(ADC|SBC)S[WX]r$")>;
-
-def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
-def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
-def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
-def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
-
-def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
-
-
-// Move and Shift Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
-
-
-// Divide and Multiply Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_12cyc_1MDU],  (instregex "^(S|U)DIVWr$")>;
-def : InstRW<[TSV110Wr_20cyc_1MDU],  (instregex "^(S|U)DIVXr$")>;
-
-def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
-def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
-def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
-def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
-def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
-def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
-
-
-// Miscellaneous Data-Processing Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^EXTR(W|X)rri$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(S|U)?BFM(W|X)ri$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
-
-
-// Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(W|X)l$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs LDRSWl)>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)ui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTR(B|H|W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDUR(BB|HH|W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDNP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],           (instrs LDPSWi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFMl)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFUMi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMro(W|X)$")>;
-
-
-// Store Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STN?P(W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STP(W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STUR(BB|HH|W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STTR(B|H|W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STR(BB|HH|W|X)ui$")>;
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
-
-
-// FP Data Processing Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
-
-def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
-def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
-def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
-def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
-def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
-
-
-// FP Miscellaneous Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
-def : InstRW<[TSV110Wr_3cyc_1F],      (instregex "^FCVT[HSD][HSD]r")>;
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1],   (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
-def : InstRW<[TSV110Wr_2cyc_1F],      (instregex "^FMOV[SD][ir]$")>;
-
-
-// FP Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[DSQ]l")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],            (instregex "^LDR[BDHSQ](post|pre)")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase],  (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi],           (instregex "^LDN?P[DQS]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
-
-
-// FP Store Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STUR[BHSDQ]i")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STR[BHSDQ]ui")>;
-def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt],                     (instregex "^STN?P[SDQ]i")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr],           (instregex "^STP[SDQ](post|pre)")>;
-
-
-// ASIMD Integer Instructions
-// -----------------------------------------------------------------------------
-
-// Reference for forms in this group
-//   D form - v8i8, v4i16, v2i32
-//   Q form - v16i8, v8i16, v4i32
-//   D form - v1i8, v1i16, v1i32, v1i64
-//   Q form - v16i8, v8i16, v4i32, v2i64
-//   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
-//   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
-
-// ASIMD simple arithmetic
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
-
-// ASIMD complex arithmetic
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
-
-// ASIMD compare
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
-
-// ASIMD max/min
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
-
-// ASIMD logical
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
-
-// ASIMD multiply accumulate, D-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
-// ASIMD multiply accumulate, Q-form
-def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
-
-// ASIMD multiply accumulate long
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
-
-// ASIMD shift
-// ASIMD shift accumulate
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
-// ASIMD shift by immed, basic
-def : InstRW<[TSV110Wr_4cyc_1FSU1],
-            (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
-// ASIMD shift by immed, complex
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
-// ASIMD shift by register, basic, Q-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
-// ASIMD shift by register, complex, D-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
-// ASIMD shift by register, complex, Q-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
-
-// ASIMD reduction
-// ASIMD arith, reduce, 4H/4S
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
-// ASIMD arith, reduce, 8B/8H
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
-// ASIMD arith, reduce, 16B
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
-
-// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
-// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
-// ASIMD max/min, reduce, 16B
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
-
-
-// Vector - Floating Point
-// -----------------------------------------------------------------------------
-
-// Reference for forms in this group
-//   D form - v2f32
-//   Q form - v4f32, v2f64
-//   D form - 32, 64
-//   D form - v1i32, v1i64
-//   D form - v2i32
-//   Q form - v4i32, v2i64
-
-// ASIMD FP sign manipulation
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FABSv")>;
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FNEGv")>;
-
-// ASIMD FP compare
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
-
-// ASIMD FP convert
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FCVT[AMNPZ][SU]v")>;
-def : InstRW<[TSV110Wr_3cyc_1F],  (instregex "^FCVT(L)v")>;
-def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FCVT(N|XN)v")>;
-
-// ASIMD FP divide, D-form, F32
-def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
-// ASIMD FP divide, Q-form, F32
-def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
-// ASIMD FP divide, Q-form, F64
-def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
-
-// ASIMD FP SQRT
-def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
-def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
-def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
-
-// ASIMD FP max,min
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?v")>;
-def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Pv")>;
-def : InstRW<[TSV110Wr_4cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Vv")>;
-
-// ASIMD FP add
-def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^F(ADD|ADDP|SUB)v")>;
-
-// ASIMD FP multiply
-def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FMULX?v")>;
-
-
-// ASIMD Miscellaneous Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
-
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
-
-// ASIMD table lookup, D-form
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
-// ASIMD table lookup, Q-form
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
-
-
-// ASIMD Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt],            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt],            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt],           (instregex "LD1i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt],           (instregex "LD2i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt],           (instregex "LD3i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt],           (instregex "LD4i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-
-def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt],               (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt],               (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt],           (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-
-// ASIMD Store Instructions
-// -----------------------------------------------------------------------------
-
-def  : InstRW<[TSV110Wr_3cyc_1F],             (instregex "ST1i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],   (instregex "ST1i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_4cyc_1F],             (instregex "ST2i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],   (instregex "ST2i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_5cyc_1F],             (instregex "ST3i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],   (instregex "ST3i(8|16|32|64)_POST$")>;
-def  : InstRW<[TSV110Wr_6cyc_1F],             (instregex "ST4i(8|16|32|64)$")>;
-def  : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],   (instregex "ST4i(8|16|32|64)_POST$")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F],              (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_1F],              (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_8cyc_1F],              (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-} // SchedModel = TSV110Model
+//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This file defines the machine model for Huawei TSV110 to support 
+// instruction scheduling and other instruction cost heuristics. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+// ===---------------------------------------------------------------------===// 
+// The following definitions describe the simpler per-operand machine model. 
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. 
+ 
+// Huawei TSV110 scheduling machine model. 
+def TSV110Model : SchedMachineModel { 
+  let IssueWidth            =   4; // 4 micro-ops dispatched  per cycle.  
+  let MicroOpBufferSize     = 128; // 128 micro-op re-order buffer 
+  let LoopMicroOpBufferSize =  16;  
+  let LoadLatency           =   4; // Optimistic load latency. 
+  let MispredictPenalty     =  14; // Fetch + Decode/Rename/Dispatch + Branch 
+  let CompleteModel         =   1; 
+ 
+  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 
+                                                    PAUnsupported.F); 
+} 
+ 
+// Define each kind of processor resource and number available on the TSV110, 
+// which has 8 pipelines, each with its own queue where micro-ops wait for 
+// their operands and issue out-of-order to one of eight execution pipelines. 
+let SchedModel = TSV110Model in { 
+  def TSV110UnitALU  : ProcResource<1>; // Int ALU 
+  def TSV110UnitAB   : ProcResource<2>; // Int ALU/BRU 
+  def TSV110UnitMDU  : ProcResource<1>; // Multi-Cycle 
+  def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD 
+  def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD 
+  def TSV110UnitLdSt : ProcResource<2>; // Load/Store 
+ 
+  def TSV110UnitF     : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>; 
+  def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>; 
+  def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>; 
+} 
+ 
+let SchedModel = TSV110Model in { 
+ 
+//===----------------------------------------------------------------------===// 
+// Map the target-defined scheduler read/write resources and latency for  
+// TSV110 
+ 
+// Integer ALU 
+def : WriteRes<WriteImm,   [TSV110UnitALUAB]> { let Latency = 1; } 
+def : WriteRes<WriteI,     [TSV110UnitALUAB]> { let Latency = 1; } 
+def : WriteRes<WriteISReg, [TSV110UnitMDU]>   { let Latency = 2; }  
+def : WriteRes<WriteIEReg, [TSV110UnitMDU]>   { let Latency = 2; }  
+def : WriteRes<WriteExtr,  [TSV110UnitALUAB]> { let Latency = 1; } 
+def : WriteRes<WriteIS,    [TSV110UnitALUAB]> { let Latency = 1; } 
+ 
+// Integer Mul/MAC/Div 
+def : WriteRes<WriteID32,  [TSV110UnitMDU]> { let Latency = 12; 
+                                              let ResourceCycles = [12]; }  
+def : WriteRes<WriteID64,  [TSV110UnitMDU]> { let Latency = 20; 
+                                              let ResourceCycles = [20]; } 
+def : WriteRes<WriteIM32,  [TSV110UnitMDU]> { let Latency = 3; } 
+def : WriteRes<WriteIM64,  [TSV110UnitMDU]> { let Latency = 4; } 
+ 
+// Load 
+def : WriteRes<WriteLD,    [TSV110UnitLdSt]> { let Latency = 4; } 
+def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; } 
+def : WriteRes<WriteLDHi,  []> { let Latency = 4; } 
+ 
+// Pre/Post Indexing 
+def : WriteRes<WriteAdr,   [TSV110UnitALUAB]> { let Latency = 1; }  
+ 
+// Store 
+def : WriteRes<WriteST,    [TSV110UnitLdSt]> { let Latency = 1; } 
+def : WriteRes<WriteSTP,   [TSV110UnitLdSt]> { let Latency = 1; } 
+def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; } 
+ 
+// FP 
+def : WriteRes<WriteF,     [TSV110UnitF]> { let Latency = 2; } 
+def : WriteRes<WriteFCmp,  [TSV110UnitF]> { let Latency = 3; } 
+def : WriteRes<WriteFCvt,  [TSV110UnitF]> { let Latency = 3; }  
+def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; } 
+def : WriteRes<WriteFImm,  [TSV110UnitF]> { let Latency = 2; }  
+def : WriteRes<WriteFMul,  [TSV110UnitF]> { let Latency = 5; } 
+ 
+// FP Div, Sqrt 
+def : WriteRes<WriteFDiv,  [TSV110UnitFSU1]> { let Latency = 18; }  
+ 
+def : WriteRes<WriteV,     [TSV110UnitF]>     { let Latency = 4; } 
+def : WriteRes<WriteVLD,   [TSV110UnitFLdSt]> { let Latency = 5; } 
+def : WriteRes<WriteVST,   [TSV110UnitF]>     { let Latency = 1; } 
+ 
+// Branch 
+def : WriteRes<WriteBr,    [TSV110UnitAB]> { let Latency = 1; } 
+def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; } 
+def : WriteRes<WriteSys,     []> { let Latency = 1; } 
+def : WriteRes<WriteBarrier, []> { let Latency = 1; } 
+def : WriteRes<WriteHint,    []> { let Latency = 1; } 
+ 
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }  
+ 
+// Forwarding logic is modeled only for multiply and accumulate. 
+def : ReadAdvance<ReadI,       0>; 
+def : ReadAdvance<ReadISReg,   0>; 
+def : ReadAdvance<ReadIEReg,   0>; 
+def : ReadAdvance<ReadIM,      0>; 
+def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>; 
+def : ReadAdvance<ReadID,      0>; 
+def : ReadAdvance<ReadExtrHi,  0>; 
+def : ReadAdvance<ReadAdrBase, 0>; 
+def : ReadAdvance<ReadVLD,     0>; 
+ 
+def : InstRW<[WriteI], (instrs COPY)>; 
+ 
+// Detailed Refinements 
+//===----------------------------------------------------------------------===// 
+ 
+// Contains all of the TSV110 specific SchedWriteRes types. The approach 
+// below is to define a generic SchedWriteRes for every combination of 
+// latency and microOps. The naming conventions is to use a prefix, one field 
+// for latency, and one or more microOp count/type designators. 
+//   Prefix: TSV110Wr 
+//       Latency: #cyc 
+//   MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt) 
+// 
+// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are 
+//      1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes. 
+// 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 1 micro-op types 
+ 
+def TSV110Wr_1cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 1; } 
+def TSV110Wr_1cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 1; } 
+def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; } 
+def TSV110Wr_1cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 1; } 
+ 
+def TSV110Wr_2cyc_1AB    : SchedWriteRes<[TSV110UnitAB]>    { let Latency = 2; } 
+def TSV110Wr_2cyc_1ALU   : SchedWriteRes<[TSV110UnitALU]>   { let Latency = 2; } 
+def TSV110Wr_2cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 2; } 
+def TSV110Wr_2cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 2; } 
+def TSV110Wr_2cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 2; } 
+def TSV110Wr_2cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 2; } 
+ 
+def TSV110Wr_3cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 3; } 
+def TSV110Wr_3cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 3; } 
+def TSV110Wr_3cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 3; } 
+ 
+def TSV110Wr_4cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 4; } 
+def TSV110Wr_4cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 4; } 
+def TSV110Wr_4cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 4; } 
+def TSV110Wr_4cyc_1MDU   : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 4; } 
+ 
+def TSV110Wr_5cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 5; } 
+def TSV110Wr_5cyc_1FSU1  : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 5; } 
+def TSV110Wr_5cyc_1FSU2  : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 5; } 
+def TSV110Wr_5cyc_1LdSt  : SchedWriteRes<[TSV110UnitLdSt]>  { let Latency = 5; } 
+ 
+def TSV110Wr_6cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 6; } 
+ 
+def TSV110Wr_7cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 7; } 
+ 
+def TSV110Wr_8cyc_1F     : SchedWriteRes<[TSV110UnitF]>     { let Latency = 8; } 
+ 
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 11; } 
+ 
+def TSV110Wr_12cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 12; } 
+ 
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 17; } 
+ 
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 18; } 
+ 
+def TSV110Wr_20cyc_1MDU  : SchedWriteRes<[TSV110UnitMDU]>   { let Latency = 20; } 
+ 
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 24; } 
+ 
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 31; } 
+ 
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 36; } 
+ 
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]>  { let Latency = 38; } 
+ 
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]>  { let Latency = 64; } 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 2 micro-op types 
+ 
+def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, 
+                                                TSV110UnitALUAB]> { 
+  let Latency = 1; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_2cyc_1LdSt_1ALUAB :  SchedWriteRes<[TSV110UnitLdSt, 
+                                                 TSV110UnitALUAB]> { 
+  let Latency = 2; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_2cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt, 
+                                                TSV110UnitLdSt]> { 
+  let Latency = 2; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_2cyc_2F           : SchedWriteRes<[TSV110UnitF, 
+                                                TSV110UnitF]> { 
+  let Latency = 2; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_2cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1, 
+                                                TSV110UnitFSU2]> { 
+  let Latency = 2; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_4cyc_2F           : SchedWriteRes<[TSV110UnitF, 
+                                                TSV110UnitF]> { 
+  let Latency = 4; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_4cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1, 
+                                                TSV110UnitFSU2]> { 
+  let Latency = 4; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, 
+                                                TSV110UnitALUAB]> { 
+  let Latency = 4; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_5cyc_1ALU_1F      : SchedWriteRes<[TSV110UnitALU, 
+                                                TSV110UnitF]> { 
+  let Latency     = 5; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_6cyc_2LdSt        : SchedWriteRes<[TSV110UnitLdSt, 
+                                                TSV110UnitLdSt]> { 
+  let Latency = 6; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt, 
+                                                TSV110UnitALUAB]> { 
+  let Latency = 6; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_7cyc_1F_1LdSt     : SchedWriteRes<[TSV110UnitF, 
+                                                TSV110UnitLdSt]> { 
+  let Latency = 7; 
+  let NumMicroOps = 2; 
+} 
+ 
+def TSV110Wr_8cyc_2FSU1        : SchedWriteRes<[TSV110UnitFSU1, 
+                                                TSV110UnitFSU1]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+ 
+def TSV110Wr_8cyc_1FSU1_1FSU2  : SchedWriteRes<[TSV110UnitFSU1, 
+                                                TSV110UnitFSU2]> { 
+  let Latency = 8; 
+  let NumMicroOps = 2; 
+} 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 3 micro-op types 
+ 
+def TSV110Wr_6cyc_3F       : SchedWriteRes<[TSV110UnitF, TSV110UnitF, 
+                                            TSV110UnitF]> { 
+  let Latency     = 6; 
+  let NumMicroOps = 3; 
+} 
+ 
+def TSV110Wr_6cyc_3LdSt    : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt, 
+                                            TSV110UnitLdSt]> { 
+  let Latency = 6; 
+  let NumMicroOps = 3; 
+} 
+ 
+def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, 
+                                                         TSV110UnitLdSt]> { 
+  let Latency = 7; 
+  let NumMicroOps = 3; 
+} 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 4 micro-op types 
+ 
+def TSV110Wr_8cyc_4F          : SchedWriteRes<[TSV110UnitF, TSV110UnitF, 
+                                               TSV110UnitF, TSV110UnitF]> { 
+  let Latency = 8; 
+  let NumMicroOps = 4; 
+} 
+ 
+def TSV110Wr_8cyc_3F_1LdSt    : SchedWriteRes<[TSV110UnitF, TSV110UnitF, 
+                                               TSV110UnitF, TSV110UnitLdSt]> { 
+  let Latency = 8; 
+  let NumMicroOps = 4; 
+} 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 5 micro-op types 
+ 
+def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF, 
+                                            TSV110UnitLdSt, TSV110UnitLdSt]> { 
+  let Latency = 8; 
+  let NumMicroOps = 5; 
+} 
+ 
+//===----------------------------------------------------------------------===// 
+// Define Generic 8 micro-op types 
+ 
+def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, 
+                                             TSV110UnitF, TSV110UnitF, 
+                                             TSV110UnitLdSt, TSV110UnitLdSt, 
+                                             TSV110UnitLdSt, TSV110UnitLdSt]> { 
+  let Latency = 10; 
+  let NumMicroOps = 8; 
+} 
+ 
+ 
+// Branch Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>; 
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>; 
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>; 
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>; 
+ 
+ 
+// Cryptography Extensions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>; 
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>; 
+def : InstRW<[TSV110Wr_2cyc_2F],    (instregex "^SHA1(H|SU0)")>; 
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>; 
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>; 
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>; 
+def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>; 
+def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC],  (instregex "^CRC32.*$")>; 
+ 
+ 
+// Arithmetic and Logical Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>; 
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(BIC)S[WX]rr")>; 
+ 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>; 
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>; 
+ 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>; 
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(ADC|SBC)S[WX]r$")>; 
+ 
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>; 
+def : InstRW<[TSV110Wr_2cyc_1MDU],   (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>; 
+def : InstRW<[TSV110Wr_2cyc_1AB],    (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>; 
+ 
+def : InstRW<[TSV110Wr_1cyc_1AB],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; 
+ 
+ 
+// Move and Shift Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>; 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>; 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>; 
+ 
+ 
+// Divide and Multiply Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_12cyc_1MDU],  (instregex "^(S|U)DIVWr$")>; 
+def : InstRW<[TSV110Wr_20cyc_1MDU],  (instregex "^(S|U)DIVXr$")>; 
+ 
+def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>; 
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>; 
+def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>; 
+def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>; 
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>; 
+def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>; 
+ 
+ 
+// Miscellaneous Data-Processing Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^EXTR(W|X)rri$")>; 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(S|U)?BFM(W|X)ri$")>; 
+def : InstRW<[TSV110Wr_1cyc_1ALUAB],    (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>; 
+ 
+ 
+// Load Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(W|X)l$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs LDRSWl)>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDR(BB|HH|W|X)ui$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr],     (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTR(B|H|W|X)i$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDUR(BB|HH|W|X)i$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDNP(W|X)i$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],     (instregex "^LDP(W|X)i$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi],           (instrs LDPSWi)>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFMl)>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instrs PRFUMi)>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMui$")>; 
+def : InstRW<[TSV110Wr_4cyc_1LdSt],     (instregex "^PRFMro(W|X)$")>; 
+ 
+ 
+// Store Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STN?P(W|X)i$")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STP(W|X)(post|pre)$")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STUR(BB|HH|W|X)i$")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STTR(B|H|W|X)i$")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],            (instregex "^STR(BB|HH|W|X)ui$")>; 
+ 
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)(post|pre)$")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr],  (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; 
+ 
+ 
+// FP Data Processing Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>; 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>; 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>; 
+ 
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>; 
+def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>; 
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>; 
+def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>; 
+ 
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>; 
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>; 
+def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>; 
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>; 
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>; 
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>; 
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>; 
+ 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>; 
+ 
+ 
+// FP Miscellaneous Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; 
+def : InstRW<[TSV110Wr_3cyc_1F],      (instregex "^FCVT[HSD][HSD]r")>; 
+ 
+def : InstRW<[TSV110Wr_2cyc_1FSU1],   (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>; 
+def : InstRW<[TSV110Wr_2cyc_1F],      (instregex "^FMOV[SD][ir]$")>; 
+ 
+ 
+// FP Load Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[DSQ]l")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDUR[BDHSQ]i")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],            (instregex "^LDR[BDHSQ](post|pre)")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt],                      (instregex "^LDR[BDHSQ]ui")>; 
+def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase],  (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi],           (instregex "^LDN?P[DQS]i")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>; 
+ 
+ 
+// FP Store Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STUR[BHSDQ]i")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>; 
+def : InstRW<[TSV110Wr_1cyc_1LdSt],                     (instregex "^STR[BHSDQ]ui")>; 
+def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>; 
+def : InstRW<[TSV110Wr_2cyc_2LdSt],                     (instregex "^STN?P[SDQ]i")>; 
+def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr],           (instregex "^STP[SDQ](post|pre)")>; 
+ 
+ 
+// ASIMD Integer Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+// Reference for forms in this group 
+//   D form - v8i8, v4i16, v2i32 
+//   Q form - v16i8, v8i16, v4i32 
+//   D form - v1i8, v1i16, v1i32, v1i64 
+//   Q form - v16i8, v8i16, v4i32, v2i64 
+//   D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 
+//   Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 
+ 
+// ASIMD simple arithmetic 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>; 
+ 
+// ASIMD complex arithmetic 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>; 
+ 
+// ASIMD compare 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>; 
+ 
+// ASIMD max/min 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>; 
+ 
+// ASIMD logical 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>; 
+ 
+// ASIMD multiply accumulate, D-form 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>; 
+// ASIMD multiply accumulate, Q-form 
+def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>; 
+ 
+// ASIMD multiply accumulate long 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>; 
+ 
+// ASIMD shift 
+// ASIMD shift accumulate 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>; 
+// ASIMD shift by immed, basic 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], 
+            (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>; 
+// ASIMD shift by immed, complex 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>; 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>; 
+// ASIMD shift by register, basic, Q-form 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 
+// ASIMD shift by register, complex, D-form 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 
+// ASIMD shift by register, complex, Q-form 
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 
+ 
+// ASIMD reduction 
+// ASIMD arith, reduce, 4H/4S 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 
+// ASIMD arith, reduce, 8B/8H 
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 
+// ASIMD arith, reduce, 16B 
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>; 
+ 
+// ASIMD max/min, reduce, 4H/4S 
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 
+// ASIMD max/min, reduce, 8B/8H 
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 
+// ASIMD max/min, reduce, 16B 
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 
+ 
+ 
+// Vector - Floating Point 
+// ----------------------------------------------------------------------------- 
+ 
+// Reference for forms in this group 
+//   D form - v2f32 
+//   Q form - v4f32, v2f64 
+//   D form - 32, 64 
+//   D form - v1i32, v1i64 
+//   D form - v2i32 
+//   Q form - v4i32, v2i64 
+ 
+// ASIMD FP sign manipulation 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FABSv")>; 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FNEGv")>; 
+ 
+// ASIMD FP compare 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>; 
+ 
+// ASIMD FP convert 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^FCVT[AMNPZ][SU]v")>; 
+def : InstRW<[TSV110Wr_3cyc_1F],  (instregex "^FCVT(L)v")>; 
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FCVT(N|XN)v")>; 
+ 
+// ASIMD FP divide, D-form, F32 
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>; 
+// ASIMD FP divide, Q-form, F32 
+def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>; 
+// ASIMD FP divide, Q-form, F64 
+def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>; 
+ 
+// ASIMD FP SQRT 
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>; 
+def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>; 
+def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>; 
+ 
+// ASIMD FP max,min 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?v")>; 
+def : InstRW<[TSV110Wr_2cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Pv")>; 
+def : InstRW<[TSV110Wr_4cyc_1F],  (instregex "^F(MAX|MIN)(NM)?Vv")>; 
+ 
+// ASIMD FP add 
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^F(ADD|ADDP|SUB)v")>; 
+ 
+// ASIMD FP multiply 
+def : InstRW<[TSV110Wr_5cyc_1F],  (instregex "^FMULX?v")>; 
+ 
+ 
+// ASIMD Miscellaneous Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>; 
+ 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>; 
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>; 
+ 
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>; 
+ 
+// ASIMD table lookup, D-form 
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>; 
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>; 
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>; 
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>; 
+// ASIMD table lookup, Q-form 
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>; 
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>; 
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>; 
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>; 
+ 
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>; 
+ 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>; 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>; 
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>; 
+ 
+ 
+// ASIMD Load Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt],            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr],  (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt],            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr],  (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt],           (instregex "LD1i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt],           (instregex "LD2i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt],           (instregex "LD3i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt],           (instregex "LD4i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt],               (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr],     (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_6cyc_3LdSt],               (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr],     (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_6cyc_2LdSt],               (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr],     (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt],            (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr],  (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt],            (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr],  (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt],           (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+ 
+// ASIMD Store Instructions 
+// ----------------------------------------------------------------------------- 
+ 
+def  : InstRW<[TSV110Wr_3cyc_1F],             (instregex "ST1i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],   (instregex "ST1i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_4cyc_1F],             (instregex "ST2i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],   (instregex "ST2i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_5cyc_1F],             (instregex "ST3i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],   (instregex "ST3i(8|16|32|64)_POST$")>; 
+def  : InstRW<[TSV110Wr_6cyc_1F],             (instregex "ST4i(8|16|32|64)$")>; 
+def  : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],   (instregex "ST4i(8|16|32|64)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_3cyc_1F],              (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr],    (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+def : InstRW<[TSV110Wr_6cyc_1F],              (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr],    (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_4cyc_1F],              (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr],    (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_5cyc_1F],              (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr],    (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+def : InstRW<[TSV110Wr_8cyc_1F],              (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 
+def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr],    (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 
+ 
+} // SchedModel = TSV110Model 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a5bc3668ed..38ab512c56 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -82,8 +82,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
   unsigned OffsetScaled = 0;
   while (OffsetScaled < ObjSizeScaled) {
     if (ObjSizeScaled - OffsetScaled >= 2) {
-      SDValue AddrNode =
-          DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+      SDValue AddrNode = 
+          DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl); 
       SDValue St = DAG.getMemIntrinsicNode(
           OpCode2, dl, DAG.getVTList(MVT::Other),
           {Chain, TagSrc, AddrNode},
@@ -95,8 +95,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
     }
 
     if (ObjSizeScaled - OffsetScaled > 0) {
-      SDValue AddrNode =
-          DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+      SDValue AddrNode = 
+          DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl); 
       SDValue St = DAG.getMemIntrinsicNode(
           OpCode1, dl, DAG.getVTList(MVT::Other),
           {Chain, TagSrc, AddrNode},
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
index ab49e0c3f9..93dfda439d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -59,7 +59,7 @@
 
 using namespace llvm;
 
-#define DEBUG_TYPE "aarch64-stack-tagging"
+#define DEBUG_TYPE "aarch64-stack-tagging" 
 
 static cl::opt<bool> ClMergeInit(
     "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
@@ -73,10 +73,10 @@ static cl::opt<bool>
 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
                                      cl::init(40), cl::Hidden);
 
-static cl::opt<unsigned>
-    ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
-                         cl::Hidden);
-
+static cl::opt<unsigned> 
+    ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272), 
+                         cl::Hidden); 
+ 
 static const Align kTagGranuleSize = Align(16);
 
 namespace {
@@ -107,10 +107,10 @@ public:
         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
 
   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
-    auto I =
-        llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
-          return LHS.End <= RHS;
-        });
+    auto I = 
+        llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) { 
+          return LHS.End <= RHS; 
+        }); 
     if (I != Ranges.end() && End > I->Start) {
       // Overlap - bail.
       return false;
@@ -439,8 +439,8 @@ void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
   bool LittleEndian =
       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
   // Current implementation of initializer merging assumes little endianness.
-  if (MergeInit && !F->hasOptNone() && LittleEndian &&
-      Size < ClMergeInitSizeLimit) {
+  if (MergeInit && !F->hasOptNone() && LittleEndian && 
+      Size < ClMergeInitSizeLimit) { 
     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
                       << ", size = " << Size << "\n");
     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
@@ -571,7 +571,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
       auto *II = dyn_cast<IntrinsicInst>(I);
       if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
                  II->getIntrinsicID() == Intrinsic::lifetime_end)) {
-        AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+        AllocaInst *AI = findAllocaForValue(II->getArgOperand(1)); 
         if (!AI) {
           UnrecognizedLifetimes.push_back(I);
           continue;
@@ -659,7 +659,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
       IntrinsicInst *Start = Info.LifetimeStart[0];
       IntrinsicInst *End = Info.LifetimeEnd[0];
       uint64_t Size =
-          cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+          cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue(); 
       Size = alignTo(Size, kTagGranuleSize);
       tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
       // We need to ensure that if we tag some object, we certainly untag it
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 41096a9613..4e64b6116e 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -49,12 +49,12 @@ cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
             "apply unchecked-ld-st when the target is definitely within range"),
         clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
 
-static cl::opt<bool>
-    ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
-                cl::ZeroOrMore,
-                cl::desc("Apply first slot optimization for stack tagging "
-                         "(eliminate ADDG Rt, Rn, 0, 0)."));
-
+static cl::opt<bool> 
+    ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 
+                cl::ZeroOrMore, 
+                cl::desc("Apply first slot optimization for stack tagging " 
+                         "(eliminate ADDG Rt, Rn, 0, 0).")); 
+ 
 namespace {
 
 class AArch64StackTaggingPreRA : public MachineFunctionPass {
@@ -76,7 +76,7 @@ public:
   bool mayUseUncheckedLoadStore();
   void uncheckUsesOf(unsigned TaggedReg, int FI);
   void uncheckLoadsAndStores();
-  Optional<int> findFirstSlotCandidate();
+  Optional<int> findFirstSlotCandidate(); 
 
   bool runOnMachineFunction(MachineFunction &Func) override;
   StringRef getPassName() const override {
@@ -203,141 +203,141 @@ void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
   }
 }
 
-struct SlotWithTag {
-  int FI;
-  int Tag;
-  SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
-  explicit SlotWithTag(const MachineInstr &MI)
-      : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
-  bool operator==(const SlotWithTag &Other) const {
-    return FI == Other.FI && Tag == Other.Tag;
-  }
-};
-
-namespace llvm {
-template <> struct DenseMapInfo<SlotWithTag> {
-  static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
-  static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
-  static unsigned getHashValue(const SlotWithTag &V) {
-    return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
-                        DenseMapInfo<int>::getHashValue(V.Tag));
-  }
-  static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
-    return A == B;
-  }
-};
-} // namespace llvm
-
-static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
-  return MFI->getUseLocalStackAllocationBlock() &&
-         MFI->isObjectPreAllocated(FI);
-}
-
-// Pin one of the tagged slots to offset 0 from the tagged base pointer.
-// This would make its address available in a virtual register (IRG's def), as
-// opposed to requiring an ADDG instruction to materialize. This effectively
-// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
-// live almost everywhere anyway), and therefore needs to happen before
-// regalloc.
-Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
-  // Find the best (FI, Tag) pair to pin to offset 0.
-  // Looking at the possible uses of a tagged address, the advantage of pinning
-  // is:
-  // - COPY to physical register.
-  //   Does not matter, this would trade a MOV instruction for an ADDG.
-  // - ST*G matter, but those mostly appear near the function prologue where all
-  //   the tagged addresses need to be materialized anyway; also, counting ST*G
-  //   uses would overweight large allocas that require more than one ST*G
-  //   instruction.
-  // - Load/Store instructions in the address operand do not require a tagged
-  //   pointer, so they also do not benefit. These operands have already been
-  //   eliminated (see uncheckLoadsAndStores) so all remaining load/store
-  //   instructions count.
-  // - Any other instruction may benefit from being pinned to offset 0.
-  LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
-  if (!ClFirstSlot)
-    return None;
-
-  DenseMap<SlotWithTag, int> RetagScore;
-  SlotWithTag MaxScoreST{-1, -1};
-  int MaxScore = -1;
-  for (auto *I : ReTags) {
-    SlotWithTag ST{*I};
-    if (isSlotPreAllocated(MFI, ST.FI))
-      continue;
-
-    Register RetagReg = I->getOperand(0).getReg();
-    if (!Register::isVirtualRegister(RetagReg))
-      continue;
-
-    int Score = 0;
-    SmallVector<Register, 8> WorkList;
-    WorkList.push_back(RetagReg);
-
-    while (!WorkList.empty()) {
-      Register UseReg = WorkList.back();
-      WorkList.pop_back();
-      for (auto &UseI : MRI->use_instructions(UseReg)) {
-        unsigned Opcode = UseI.getOpcode();
-        if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
-            Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
-            Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
-            Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
-            Opcode == AArch64::STZGloop_wback)
-          continue;
-        if (UseI.isCopy()) {
-          Register DstReg = UseI.getOperand(0).getReg();
-          if (Register::isVirtualRegister(DstReg))
-            WorkList.push_back(DstReg);
-          continue;
-        }
-        LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
-                          << Register::virtReg2Index(UseReg) << " in " << UseI
-                          << "\n");
-        Score++;
-      }
-    }
-
-    int TotalScore = RetagScore[ST] += Score;
-    if (TotalScore > MaxScore ||
-        (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
-      MaxScore = TotalScore;
-      MaxScoreST = ST;
-    }
-  }
-
-  if (MaxScoreST.FI < 0)
-    return None;
-
-  // If FI's tag is already 0, we are done.
-  if (MaxScoreST.Tag == 0)
-    return MaxScoreST.FI;
-
-  // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
-  SlotWithTag SwapST{-1, -1};
-  for (auto *I : ReTags) {
-    SlotWithTag ST{*I};
-    if (ST.Tag == 0) {
-      SwapST = ST;
-      break;
-    }
-  }
-
-  // Swap tags between the victim and the highest scoring pair.
-  // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
-  // the highest score slot without changing anything else.
-  for (auto *&I : ReTags) {
-    SlotWithTag ST{*I};
-    MachineOperand &TagOp = I->getOperand(4);
-    if (ST == MaxScoreST) {
-      TagOp.setImm(0);
-    } else if (ST == SwapST) {
-      TagOp.setImm(MaxScoreST.Tag);
-    }
-  }
-  return MaxScoreST.FI;
-}
-
+struct SlotWithTag { 
+  int FI; 
+  int Tag; 
+  SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 
+  explicit SlotWithTag(const MachineInstr &MI) 
+      : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 
+  bool operator==(const SlotWithTag &Other) const { 
+    return FI == Other.FI && Tag == Other.Tag; 
+  } 
+}; 
+ 
+namespace llvm { 
+template <> struct DenseMapInfo<SlotWithTag> { 
+  static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 
+  static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 
+  static unsigned getHashValue(const SlotWithTag &V) { 
+    return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 
+                        DenseMapInfo<int>::getHashValue(V.Tag)); 
+  } 
+  static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 
+    return A == B; 
+  } 
+}; 
+} // namespace llvm 
+ 
+static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 
+  return MFI->getUseLocalStackAllocationBlock() && 
+         MFI->isObjectPreAllocated(FI); 
+} 
+ 
+// Pin one of the tagged slots to offset 0 from the tagged base pointer. 
+// This would make its address available in a virtual register (IRG's def), as 
+// opposed to requiring an ADDG instruction to materialize. This effectively 
+// eliminates a vreg (by replacing it with direct uses of IRG, which is usually 
+// live almost everywhere anyway), and therefore needs to happen before 
+// regalloc. 
+Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 
+  // Find the best (FI, Tag) pair to pin to offset 0. 
+  // Looking at the possible uses of a tagged address, the advantage of pinning 
+  // is: 
+  // - COPY to physical register. 
+  //   Does not matter, this would trade a MOV instruction for an ADDG. 
+  // - ST*G matter, but those mostly appear near the function prologue where all 
+  //   the tagged addresses need to be materialized anyway; also, counting ST*G 
+  //   uses would overweight large allocas that require more than one ST*G 
+  //   instruction. 
+  // - Load/Store instructions in the address operand do not require a tagged 
+  //   pointer, so they also do not benefit. These operands have already been 
+  //   eliminated (see uncheckLoadsAndStores) so all remaining load/store 
+  //   instructions count. 
+  // - Any other instruction may benefit from being pinned to offset 0. 
+  LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 
+  if (!ClFirstSlot) 
+    return None; 
+ 
+  DenseMap<SlotWithTag, int> RetagScore; 
+  SlotWithTag MaxScoreST{-1, -1}; 
+  int MaxScore = -1; 
+  for (auto *I : ReTags) { 
+    SlotWithTag ST{*I}; 
+    if (isSlotPreAllocated(MFI, ST.FI)) 
+      continue; 
+ 
+    Register RetagReg = I->getOperand(0).getReg(); 
+    if (!Register::isVirtualRegister(RetagReg)) 
+      continue; 
+ 
+    int Score = 0; 
+    SmallVector<Register, 8> WorkList; 
+    WorkList.push_back(RetagReg); 
+ 
+    while (!WorkList.empty()) { 
+      Register UseReg = WorkList.back(); 
+      WorkList.pop_back(); 
+      for (auto &UseI : MRI->use_instructions(UseReg)) { 
+        unsigned Opcode = UseI.getOpcode(); 
+        if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset || 
+            Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset || 
+            Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 
+            Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 
+            Opcode == AArch64::STZGloop_wback) 
+          continue; 
+        if (UseI.isCopy()) { 
+          Register DstReg = UseI.getOperand(0).getReg(); 
+          if (Register::isVirtualRegister(DstReg)) 
+            WorkList.push_back(DstReg); 
+          continue; 
+        } 
+        LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 
+                          << Register::virtReg2Index(UseReg) << " in " << UseI 
+                          << "\n"); 
+        Score++; 
+      } 
+    } 
+ 
+    int TotalScore = RetagScore[ST] += Score; 
+    if (TotalScore > MaxScore || 
+        (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 
+      MaxScore = TotalScore; 
+      MaxScoreST = ST; 
+    } 
+  } 
+ 
+  if (MaxScoreST.FI < 0) 
+    return None; 
+ 
+  // If FI's tag is already 0, we are done. 
+  if (MaxScoreST.Tag == 0) 
+    return MaxScoreST.FI; 
+ 
+  // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 
+  SlotWithTag SwapST{-1, -1}; 
+  for (auto *I : ReTags) { 
+    SlotWithTag ST{*I}; 
+    if (ST.Tag == 0) { 
+      SwapST = ST; 
+      break; 
+    } 
+  } 
+ 
+  // Swap tags between the victim and the highest scoring pair. 
+  // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 
+  // the highest score slot without changing anything else. 
+  for (auto *&I : ReTags) { 
+    SlotWithTag ST{*I}; 
+    MachineOperand &TagOp = I->getOperand(4); 
+    if (ST == MaxScoreST) { 
+      TagOp.setImm(0); 
+    } else if (ST == SwapST) { 
+      TagOp.setImm(MaxScoreST.Tag); 
+    } 
+  } 
+  return MaxScoreST.FI; 
+} 
+ 
 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   MRI = &MF->getRegInfo();
@@ -366,35 +366,35 @@ bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
     }
   }
 
-  // Take over from SSP. It does nothing for tagged slots, and should not really
-  // have been enabled in the first place.
-  for (int FI : TaggedSlots)
-    MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
-
+  // Take over from SSP. It does nothing for tagged slots, and should not really 
+  // have been enabled in the first place. 
+  for (int FI : TaggedSlots) 
+    MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 
+ 
   if (ReTags.empty())
     return false;
 
   if (mayUseUncheckedLoadStore())
     uncheckLoadsAndStores();
 
-  // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
-  // If the base tagged pointer is set up to the address of this slot,
-  // the ADDG instruction can be eliminated.
-  Optional<int> BaseSlot = findFirstSlotCandidate();
-  if (BaseSlot)
-    AFI->setTaggedBasePointerIndex(*BaseSlot);
-
-  for (auto *I : ReTags) {
-    int FI = I->getOperand(1).getIndex();
-    int Tag = I->getOperand(4).getImm();
-    Register Base = I->getOperand(3).getReg();
-    if (Tag == 0 && FI == BaseSlot) {
-      BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
-              I->getOperand(0).getReg())
-          .addReg(Base);
-      I->eraseFromParent();
-    }
-  }
-
+  // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 
+  // If the base tagged pointer is set up to the address of this slot, 
+  // the ADDG instruction can be eliminated. 
+  Optional<int> BaseSlot = findFirstSlotCandidate(); 
+  if (BaseSlot) 
+    AFI->setTaggedBasePointerIndex(*BaseSlot); 
+ 
+  for (auto *I : ReTags) { 
+    int FI = I->getOperand(1).getIndex(); 
+    int Tag = I->getOperand(4).getImm(); 
+    Register Base = I->getOperand(3).getReg(); 
+    if (Tag == 0 && FI == BaseSlot) { 
+      BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 
+              I->getOperand(0).getReg()) 
+          .addReg(Base); 
+      I->eraseFromParent(); 
+    } 
+  } 
+ 
   return true;
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
index 71b2bb1964..f78643d8e7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -67,7 +67,7 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
   if (CPUString.empty())
     CPUString = "generic";
 
-  ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
+  ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); 
   initializeProperties();
 
   return *this;
@@ -103,26 +103,26 @@ void AArch64Subtarget::initializeProperties() {
   case CortexA76:
   case CortexA77:
   case CortexA78:
-  case CortexA78C:
-  case CortexR82:
+  case CortexA78C: 
+  case CortexR82: 
   case CortexX1:
     PrefFunctionLogAlignment = 4;
     break;
   case A64FX:
     CacheLineSize = 256;
-    PrefFunctionLogAlignment = 3;
-    PrefLoopLogAlignment = 2;
-    MaxInterleaveFactor = 4;
-    PrefetchDistance = 128;
-    MinPrefetchStride = 1024;
-    MaxPrefetchIterationsAhead = 4;
+    PrefFunctionLogAlignment = 3; 
+    PrefLoopLogAlignment = 2; 
+    MaxInterleaveFactor = 4; 
+    PrefetchDistance = 128; 
+    MinPrefetchStride = 1024; 
+    MaxPrefetchIterationsAhead = 4; 
     break;
   case AppleA7:
   case AppleA10:
   case AppleA11:
   case AppleA12:
   case AppleA13:
-  case AppleA14:
+  case AppleA14: 
     CacheLineSize = 64;
     PrefetchDistance = 280;
     MinPrefetchStride = 2048;
@@ -157,8 +157,8 @@ void AArch64Subtarget::initializeProperties() {
     PrefFunctionLogAlignment = 3;
     break;
   case NeoverseN1:
-  case NeoverseN2:
-  case NeoverseV1:
+  case NeoverseN2: 
+  case NeoverseV1: 
     PrefFunctionLogAlignment = 4;
     break;
   case Saphira:
@@ -209,7 +209,7 @@ void AArch64Subtarget::initializeProperties() {
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
-    : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+    : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), 
       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
       IsLittle(LittleEndian),
@@ -375,8 +375,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
     return (SVEVectorBitsMin / 128) * 128;
   return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
 }
-
-bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
-  // Prefer NEON unless larger SVE registers are available.
-  return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
-}
+ 
+bool AArch64Subtarget::useSVEForFixedLengthVectors() const { 
+  // Prefer NEON unless larger SVE registers are available. 
+  return hasSVE() && getMinSVEVectorSizeInBits() >= 256; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
index 8fe2f12598..ce401f4986 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,7 +45,7 @@ public:
     AppleA11,
     AppleA12,
     AppleA13,
-    AppleA14,
+    AppleA14, 
     Carmel,
     CortexA35,
     CortexA53,
@@ -58,24 +58,24 @@ public:
     CortexA76,
     CortexA77,
     CortexA78,
-    CortexA78C,
-    CortexR82,
+    CortexA78C, 
+    CortexR82, 
     CortexX1,
     ExynosM3,
     Falkor,
     Kryo,
     NeoverseE1,
     NeoverseN1,
-    NeoverseN2,
-    NeoverseV1,
+    NeoverseN2, 
+    NeoverseV1, 
     Saphira,
     ThunderX2T99,
     ThunderX,
     ThunderXT81,
     ThunderXT83,
     ThunderXT88,
-    ThunderX3T110,
-    TSV110
+    ThunderX3T110, 
+    TSV110 
   };
 
 protected:
@@ -88,11 +88,11 @@ protected:
   bool HasV8_4aOps = false;
   bool HasV8_5aOps = false;
   bool HasV8_6aOps = false;
-  bool HasV8_7aOps = false;
-
-  bool HasV8_0rOps = false;
-  bool HasCONTEXTIDREL2 = false;
+  bool HasV8_7aOps = false; 
 
+  bool HasV8_0rOps = false; 
+  bool HasCONTEXTIDREL2 = false; 
+ 
   bool HasFPARMv8 = false;
   bool HasNEON = false;
   bool HasCrypto = false;
@@ -127,7 +127,7 @@ protected:
   bool HasAES = false;
 
   // ARMv8.3 extensions
-  bool HasPAuth = false;
+  bool HasPAuth = false; 
   bool HasJS = false;
   bool HasCCIDX = false;
   bool HasComplxNum = false;
@@ -141,7 +141,7 @@ protected:
   bool HasSEL2 = false;
   bool HasPMU = false;
   bool HasTLB_RMI = false;
-  bool HasFlagM = false;
+  bool HasFlagM = false; 
   bool HasRCPC_IMMO = false;
 
   bool HasLSLFast = false;
@@ -170,12 +170,12 @@ protected:
   bool HasFineGrainedTraps = false;
   bool HasEnhancedCounterVirtualization = false;
 
-  // Armv8.7-A Extensions
-  bool HasXS = false;
-  bool HasWFxT = false;
-  bool HasHCX = false;
-  bool HasLS64 = false;
-
+  // Armv8.7-A Extensions 
+  bool HasXS = false; 
+  bool HasWFxT = false; 
+  bool HasHCX = false; 
+  bool HasLS64 = false; 
+ 
   // Arm SVE2 extensions
   bool HasSVE2 = false;
   bool HasSVE2AES = false;
@@ -186,9 +186,9 @@ protected:
   // Future architecture extensions.
   bool HasETE = false;
   bool HasTRBE = false;
-  bool HasBRBE = false;
-  bool HasPAUTH = false;
-  bool HasSPE_EEF = false;
+  bool HasBRBE = false; 
+  bool HasPAUTH = false; 
+  bool HasSPE_EEF = false; 
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove = false;
@@ -208,7 +208,7 @@ protected:
   // Enable 64-bit vectorization in SLP.
   unsigned MinVectorRegisterBitWidth = 64;
 
-  bool OutlineAtomics = false;
+  bool OutlineAtomics = false; 
   bool UseAA = false;
   bool PredictableSelectIsExpensive = false;
   bool BalanceFPOps = false;
@@ -221,7 +221,7 @@ protected:
   bool UseAlternateSExtLoadCVTF32Pattern = false;
   bool HasArithmeticBccFusion = false;
   bool HasArithmeticCbzFusion = false;
-  bool HasCmpBccFusion = false;
+  bool HasCmpBccFusion = false; 
   bool HasFuseAddress = false;
   bool HasFuseAES = false;
   bool HasFuseArithmeticLogic = false;
@@ -325,7 +325,7 @@ public:
   bool hasV8_3aOps() const { return HasV8_3aOps; }
   bool hasV8_4aOps() const { return HasV8_4aOps; }
   bool hasV8_5aOps() const { return HasV8_5aOps; }
-  bool hasV8_0rOps() const { return HasV8_0rOps; }
+  bool hasV8_0rOps() const { return HasV8_0rOps; } 
 
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
@@ -363,7 +363,7 @@ public:
   bool hasSHA3() const { return HasSHA3; }
   bool hasSHA2() const { return HasSHA2; }
   bool hasAES() const { return HasAES; }
-  bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
+  bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; } 
   bool balanceFPOps() const { return BalanceFPOps; }
   bool predictableSelectIsExpensive() const {
     return PredictableSelectIsExpensive;
@@ -378,7 +378,7 @@ public:
   }
   bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
-  bool hasCmpBccFusion() const { return HasCmpBccFusion; }
+  bool hasCmpBccFusion() const { return HasCmpBccFusion; } 
   bool hasFuseAddress() const { return HasFuseAddress; }
   bool hasFuseAES() const { return HasFuseAES; }
   bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
@@ -454,7 +454,7 @@ public:
   bool hasRandGen() const { return HasRandGen; }
   bool hasMTE() const { return HasMTE; }
   bool hasTME() const { return HasTME; }
-  bool hasPAUTH() const { return HasPAUTH; }
+  bool hasPAUTH() const { return HasPAUTH; } 
   // Arm SVE2 extensions
   bool hasSVE2AES() const { return HasSVE2AES; }
   bool hasSVE2SM4() const { return HasSVE2SM4; }
@@ -484,15 +484,15 @@ public:
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
-  bool isTargetILP32() const {
-    return TargetTriple.isArch32Bit() ||
-           TargetTriple.getEnvironment() == Triple::GNUILP32;
-  }
+  bool isTargetILP32() const { 
+    return TargetTriple.isArch32Bit() || 
+           TargetTriple.getEnvironment() == Triple::GNUILP32; 
+  } 
 
   bool useAA() const override { return UseAA; }
 
-  bool outlineAtomics() const { return OutlineAtomics; }
-
+  bool outlineAtomics() const { return OutlineAtomics; } 
+ 
   bool hasVH() const { return HasVH; }
   bool hasPAN() const { return HasPAN; }
   bool hasLOR() const { return HasLOR; }
@@ -501,7 +501,7 @@ public:
   bool hasPAN_RWV() const { return HasPAN_RWV; }
   bool hasCCPP() const { return HasCCPP; }
 
-  bool hasPAuth() const { return HasPAuth; }
+  bool hasPAuth() const { return HasPAuth; } 
   bool hasJS() const { return HasJS; }
   bool hasCCIDX() const { return HasCCIDX; }
   bool hasComplxNum() const { return HasComplxNum; }
@@ -512,14 +512,14 @@ public:
   bool hasTRACEV8_4() const { return HasTRACEV8_4; }
   bool hasAM() const { return HasAM; }
   bool hasAMVS() const { return HasAMVS; }
-  bool hasXS() const { return HasXS; }
-  bool hasWFxT() const { return HasWFxT; }
-  bool hasHCX() const { return HasHCX; }
-  bool hasLS64() const { return HasLS64; }
+  bool hasXS() const { return HasXS; } 
+  bool hasWFxT() const { return HasWFxT; } 
+  bool hasHCX() const { return HasHCX; } 
+  bool hasLS64() const { return HasLS64; } 
   bool hasSEL2() const { return HasSEL2; }
   bool hasPMU() const { return HasPMU; }
   bool hasTLB_RMI() const { return HasTLB_RMI; }
-  bool hasFlagM() const { return HasFlagM; }
+  bool hasFlagM() const { return HasFlagM; } 
   bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
 
   bool addrSinkUsingGEPs() const override {
@@ -542,7 +542,7 @@ public:
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 
 
   /// ClassifyGlobalReference - Find the target operand flags that describe
   /// how a global value should be referenced for the current subtarget.
@@ -581,7 +581,7 @@ public:
   // implied by the architecture.
   unsigned getMaxSVEVectorSizeInBits() const;
   unsigned getMinSVEVectorSizeInBits() const;
-  bool useSVEForFixedLengthVectors() const;
+  bool useSVEForFixedLengthVectors() const; 
 };
 } // End llvm namespace
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
index 01ac52bd87..0b9c53a72f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
@@ -32,11 +32,11 @@ def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
                  AssemblerPredicate<(all_of FeaturePAN_RWV),
                  "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
 
-def HasCONTEXTIDREL2
-               : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
-                 AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
-                 "Target contains CONTEXTIDR_EL2 RW operand">;
-
+def HasCONTEXTIDREL2 
+               : Predicate<"Subtarget->hasCONTEXTIDREL2()">, 
+                 AssemblerPredicate<(all_of FeatureCONTEXTIDREL2), 
+                 "Target contains CONTEXTIDR_EL2 RW operand">; 
+ 
 //===----------------------------------------------------------------------===//
 // AT (address translate) instruction options.
 //===----------------------------------------------------------------------===//
@@ -98,21 +98,21 @@ def : DB<"ld",    0xd>;
 def : DB<"st",    0xe>;
 def : DB<"sy",    0xf>;
 
-class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
-  let SearchableFields = ["Name", "Encoding", "ImmValue"];
-  let EnumValueField = "Encoding";
-
-  string Name = name;
-  bits<4> Encoding = encoding;
-  bits<5> ImmValue = immValue;
-  code Requires = [{ {AArch64::FeatureXS} }];
-}
-
-def : DBnXS<"oshnxs", 0x3, 0x10>;
-def : DBnXS<"nshnxs", 0x7, 0x14>;
-def : DBnXS<"ishnxs", 0xb, 0x18>;
-def : DBnXS<"synxs",  0xf, 0x1c>;
-
+class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable { 
+  let SearchableFields = ["Name", "Encoding", "ImmValue"]; 
+  let EnumValueField = "Encoding"; 
+ 
+  string Name = name; 
+  bits<4> Encoding = encoding; 
+  bits<5> ImmValue = immValue; 
+  code Requires = [{ {AArch64::FeatureXS} }]; 
+} 
+ 
+def : DBnXS<"oshnxs", 0x3, 0x10>; 
+def : DBnXS<"nshnxs", 0x7, 0x14>; 
+def : DBnXS<"ishnxs", 0xb, 0x18>; 
+def : DBnXS<"synxs",  0xf, 0x1c>; 
+ 
 //===----------------------------------------------------------------------===//
 // DC (data cache maintenance) instruction options.
 //===----------------------------------------------------------------------===//
@@ -404,8 +404,8 @@ def : BTI<"jc", 0b11>;
 // TLBI (translation lookaside buffer invalidate) instruction options.
 //===----------------------------------------------------------------------===//
 
-class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
-             bits<3> op2, bit needsreg> {
+class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, 
+             bits<3> op2, bit needsreg> { 
   string Name = name;
   bits<14> Encoding;
   let Encoding{13-11} = op1;
@@ -413,122 +413,122 @@ class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
   let Encoding{6-3} = crm;
   let Encoding{2-0} = op2;
   bit NeedsReg = needsreg;
-  list<string> Requires = [];
-  list<string> ExtraRequires = [];
-  code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
-}
-
-def TLBITable : GenericTable {
-  let FilterClass = "TLBIEntry";
-  let CppTypeName = "TLBI";
-  let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
-}
-
-def lookupTLBIByName : SearchIndex {
-  let Table = TLBITable;
-  let Key = ["Name"];
+  list<string> Requires = []; 
+  list<string> ExtraRequires = []; 
+  code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; 
 }
 
-def lookupTLBIByEncoding : SearchIndex {
-  let Table = TLBITable;
-  let Key = ["Encoding"];
-}
-
-multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
-             bits<3> op2, bit needsreg = 1> {
-  def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
-  def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
-    let Encoding{7} = 1;
-    let ExtraRequires = ["AArch64::FeatureXS"];
-  }
-}
-
-defm : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>;
-defm : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>;
-defm : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>;
-defm : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>;
-defm : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>;
-defm : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
-defm : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>;
-defm : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>;
-defm : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>;
-defm : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>;
-defm : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>;
-defm : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>;
-defm : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
-defm : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
-
+def TLBITable : GenericTable { 
+  let FilterClass = "TLBIEntry"; 
+  let CppTypeName = "TLBI"; 
+  let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; 
+} 
+
+def lookupTLBIByName : SearchIndex { 
+  let Table = TLBITable; 
+  let Key = ["Name"]; 
+} 
+ 
+def lookupTLBIByEncoding : SearchIndex { 
+  let Table = TLBITable; 
+  let Key = ["Encoding"]; 
+} 
+ 
+multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, 
+             bits<3> op2, bit needsreg = 1> { 
+  def : TLBIEntry<name, op1, crn, crm, op2, needsreg>; 
+  def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { 
+    let Encoding{7} = 1; 
+    let ExtraRequires = ["AArch64::FeatureXS"]; 
+  } 
+} 
+ 
+defm : TLBI<"IPAS2E1IS",    0b100, 0b1000, 0b0000, 0b001>; 
+defm : TLBI<"IPAS2LE1IS",   0b100, 0b1000, 0b0000, 0b101>; 
+defm : TLBI<"VMALLE1IS",    0b000, 0b1000, 0b0011, 0b000, 0>; 
+defm : TLBI<"ALLE2IS",      0b100, 0b1000, 0b0011, 0b000, 0>; 
+defm : TLBI<"ALLE3IS",      0b110, 0b1000, 0b0011, 0b000, 0>; 
+defm : TLBI<"VAE1IS",       0b000, 0b1000, 0b0011, 0b001>; 
+defm : TLBI<"VAE2IS",       0b100, 0b1000, 0b0011, 0b001>; 
+defm : TLBI<"VAE3IS",       0b110, 0b1000, 0b0011, 0b001>; 
+defm : TLBI<"ASIDE1IS",     0b000, 0b1000, 0b0011, 0b010>; 
+defm : TLBI<"VAAE1IS",      0b000, 0b1000, 0b0011, 0b011>; 
+defm : TLBI<"ALLE1IS",      0b100, 0b1000, 0b0011, 0b100, 0>; 
+defm : TLBI<"VALE1IS",      0b000, 0b1000, 0b0011, 0b101>; 
+defm : TLBI<"VALE2IS",      0b100, 0b1000, 0b0011, 0b101>; 
+defm : TLBI<"VALE3IS",      0b110, 0b1000, 0b0011, 0b101>; 
+defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; 
+defm : TLBI<"VAALE1IS",     0b000, 0b1000, 0b0011, 0b111>; 
+defm : TLBI<"IPAS2E1",      0b100, 0b1000, 0b0100, 0b001>; 
+defm : TLBI<"IPAS2LE1",     0b100, 0b1000, 0b0100, 0b101>; 
+defm : TLBI<"VMALLE1",      0b000, 0b1000, 0b0111, 0b000, 0>; 
+defm : TLBI<"ALLE2",        0b100, 0b1000, 0b0111, 0b000, 0>; 
+defm : TLBI<"ALLE3",        0b110, 0b1000, 0b0111, 0b000, 0>; 
+defm : TLBI<"VAE1",         0b000, 0b1000, 0b0111, 0b001>; 
+defm : TLBI<"VAE2",         0b100, 0b1000, 0b0111, 0b001>; 
+defm : TLBI<"VAE3",         0b110, 0b1000, 0b0111, 0b001>; 
+defm : TLBI<"ASIDE1",       0b000, 0b1000, 0b0111, 0b010>; 
+defm : TLBI<"VAAE1",        0b000, 0b1000, 0b0111, 0b011>; 
+defm : TLBI<"ALLE1",        0b100, 0b1000, 0b0111, 0b100, 0>; 
+defm : TLBI<"VALE1",        0b000, 0b1000, 0b0111, 0b101>; 
+defm : TLBI<"VALE2",        0b100, 0b1000, 0b0111, 0b101>; 
+defm : TLBI<"VALE3",        0b110, 0b1000, 0b0111, 0b101>; 
+defm : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>; 
+defm : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>; 
+ 
 // Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
-let Requires = ["AArch64::FeatureTLB_RMI"] in {
+let Requires = ["AArch64::FeatureTLB_RMI"] in { 
 // Armv8.4-A Outer Sharable TLB Maintenance instructions:
 //                         op1    CRn     CRm     op2
-defm : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>;
-defm : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>;
-defm : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>;
-defm : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>;
-defm : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>;
-defm : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
-defm : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>; 
+defm : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>; 
+defm : TLBI<"ASIDE1OS",     0b000, 0b1000, 0b0001, 0b010>; 
+defm : TLBI<"VAAE1OS",      0b000, 0b1000, 0b0001, 0b011>; 
+defm : TLBI<"VALE1OS",      0b000, 0b1000, 0b0001, 0b101>; 
+defm : TLBI<"VAALE1OS",     0b000, 0b1000, 0b0001, 0b111>; 
+defm : TLBI<"IPAS2E1OS",    0b100, 0b1000, 0b0100, 0b000>; 
+defm : TLBI<"IPAS2LE1OS",   0b100, 0b1000, 0b0100, 0b100>; 
+defm : TLBI<"VAE2OS",       0b100, 0b1000, 0b0001, 0b001>; 
+defm : TLBI<"VALE2OS",      0b100, 0b1000, 0b0001, 0b101>; 
+defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; 
+defm : TLBI<"VAE3OS",       0b110, 0b1000, 0b0001, 0b001>; 
+defm : TLBI<"VALE3OS",      0b110, 0b1000, 0b0001, 0b101>; 
+defm : TLBI<"ALLE2OS",      0b100, 0b1000, 0b0001, 0b000, 0>; 
+defm : TLBI<"ALLE1OS",      0b100, 0b1000, 0b0001, 0b100, 0>; 
+defm : TLBI<"ALLE3OS",      0b110, 0b1000, 0b0001, 0b000, 0>; 
 
 // Armv8.4-A TLB Range Maintenance instructions:
 //                         op1    CRn     CRm     op2
-defm : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>;
-defm : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>;
-defm : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>;
-defm : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>;
-defm : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>;
-defm : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>;
-defm : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>;
-defm : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>;
-defm : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>;
-defm : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>;
-defm : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>;
-defm : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>;
-defm : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>;
-defm : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>;
-defm : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE1",        0b000, 0b1000, 0b0110, 0b001>; 
+defm : TLBI<"RVAAE1",       0b000, 0b1000, 0b0110, 0b011>; 
+defm : TLBI<"RVALE1",       0b000, 0b1000, 0b0110, 0b101>; 
+defm : TLBI<"RVAALE1",      0b000, 0b1000, 0b0110, 0b111>; 
+defm : TLBI<"RVAE1IS",      0b000, 0b1000, 0b0010, 0b001>; 
+defm : TLBI<"RVAAE1IS",     0b000, 0b1000, 0b0010, 0b011>; 
+defm : TLBI<"RVALE1IS",     0b000, 0b1000, 0b0010, 0b101>; 
+defm : TLBI<"RVAALE1IS",    0b000, 0b1000, 0b0010, 0b111>; 
+defm : TLBI<"RVAE1OS",      0b000, 0b1000, 0b0101, 0b001>; 
+defm : TLBI<"RVAAE1OS",     0b000, 0b1000, 0b0101, 0b011>; 
+defm : TLBI<"RVALE1OS",     0b000, 0b1000, 0b0101, 0b101>; 
+defm : TLBI<"RVAALE1OS",    0b000, 0b1000, 0b0101, 0b111>; 
+defm : TLBI<"RIPAS2E1IS",   0b100, 0b1000, 0b0000, 0b010>; 
+defm : TLBI<"RIPAS2LE1IS",  0b100, 0b1000, 0b0000, 0b110>; 
+defm : TLBI<"RIPAS2E1",     0b100, 0b1000, 0b0100, 0b010>; 
+defm : TLBI<"RIPAS2LE1",    0b100, 0b1000, 0b0100, 0b110>; 
+defm : TLBI<"RIPAS2E1OS",   0b100, 0b1000, 0b0100, 0b011>; 
+defm : TLBI<"RIPAS2LE1OS",  0b100, 0b1000, 0b0100, 0b111>; 
+defm : TLBI<"RVAE2",        0b100, 0b1000, 0b0110, 0b001>; 
+defm : TLBI<"RVALE2",       0b100, 0b1000, 0b0110, 0b101>; 
+defm : TLBI<"RVAE2IS",      0b100, 0b1000, 0b0010, 0b001>; 
+defm : TLBI<"RVALE2IS",     0b100, 0b1000, 0b0010, 0b101>; 
+defm : TLBI<"RVAE2OS",      0b100, 0b1000, 0b0101, 0b001>; 
+defm : TLBI<"RVALE2OS",     0b100, 0b1000, 0b0101, 0b101>; 
+defm : TLBI<"RVAE3",        0b110, 0b1000, 0b0110, 0b001>; 
+defm : TLBI<"RVALE3",       0b110, 0b1000, 0b0110, 0b101>; 
+defm : TLBI<"RVAE3IS",      0b110, 0b1000, 0b0010, 0b001>; 
+defm : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>; 
+defm : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>; 
+defm : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>; 
 } //FeatureTLB_RMI
 
 // Armv8.5-A Prediction Restriction by Context instruction options:
@@ -643,7 +643,7 @@ def : ROSysReg<"ID_AA64AFR0_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b100>;
 def : ROSysReg<"ID_AA64AFR1_EL1",     0b11, 0b000, 0b0000, 0b0101, 0b101>;
 def : ROSysReg<"ID_AA64ISAR0_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b000>;
 def : ROSysReg<"ID_AA64ISAR1_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b001>;
-def : ROSysReg<"ID_AA64ISAR2_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b010>;
+def : ROSysReg<"ID_AA64ISAR2_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b010>; 
 def : ROSysReg<"ID_AA64MMFR0_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b000>;
 def : ROSysReg<"ID_AA64MMFR1_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b001>;
 def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010>;
@@ -859,9 +859,9 @@ def : RWSysReg<"ACTLR_EL1",          0b11, 0b000, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL2",          0b11, 0b100, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"ACTLR_EL3",          0b11, 0b110, 0b0001, 0b0000, 0b001>;
 def : RWSysReg<"HCR_EL2",            0b11, 0b100, 0b0001, 0b0001, 0b000>;
-def : RWSysReg<"HCRX_EL2",           0b11, 0b100, 0b0001, 0b0010, 0b010> {
-  let Requires = [{ {AArch64::FeatureHCX} }];
-}
+def : RWSysReg<"HCRX_EL2",           0b11, 0b100, 0b0001, 0b0010, 0b010> { 
+  let Requires = [{ {AArch64::FeatureHCX} }]; 
+} 
 def : RWSysReg<"SCR_EL3",            0b11, 0b110, 0b0001, 0b0001, 0b000>;
 def : RWSysReg<"MDCR_EL2",           0b11, 0b100, 0b0001, 0b0001, 0b001>;
 def : RWSysReg<"SDER32_EL3",         0b11, 0b110, 0b0001, 0b0001, 0b001>;
@@ -1293,10 +1293,10 @@ def : RWSysReg<"CNTV_CTL_EL02",   0b11, 0b101, 0b1110, 0b0011, 0b001>;
 def : RWSysReg<"CNTV_CVAL_EL02",  0b11, 0b101, 0b1110, 0b0011, 0b010>;
 def : RWSysReg<"SPSR_EL12",       0b11, 0b101, 0b0100, 0b0000, 0b000>;
 def : RWSysReg<"ELR_EL12",        0b11, 0b101, 0b0100, 0b0000, 0b001>;
-let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
-  def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
-}
+let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in { 
+  def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>; 
 }
+} 
 // v8.2a registers
 //                  Op0    Op1     CRn     CRm    Op2
 let Requires = [{ {AArch64::FeaturePsUAO} }] in
@@ -1336,7 +1336,7 @@ def : RWSysReg<"VSESR_EL2",     0b11, 0b100, 0b0101, 0b0010, 0b011>;
 
 // v8.3a "Pointer authentication extension" registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeaturePAuth} }] in {
+let Requires = [{ {AArch64::FeaturePAuth} }] in { 
 def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
 def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1570,33 +1570,33 @@ def : RWSysReg<"CNTPCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b101>;
 def : RWSysReg<"CNTVCTSS_EL0",     0b11, 0b011, 0b1110, 0b0000, 0b110>;
 }
 
-// v8.7a LD64B/ST64B Accelerator Extension system register
-let Requires = [{ {AArch64::FeatureLS64} }] in
-def : RWSysReg<"ACCDATA_EL1",       0b11, 0b000, 0b1101, 0b0000, 0b101>;
-
-// Branch Record Buffer system registers
-let Requires = [{ {AArch64::FeatureBRBE} }] in {
-def : RWSysReg<"BRBCR_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBCR_EL12",        0b10, 0b101, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBCR_EL2",         0b10, 0b100, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBFCR_EL1",        0b10, 0b001, 0b1001, 0b0000, 0b001>;
-def : ROSysReg<"BRBIDR0_EL1",       0b10, 0b001, 0b1001, 0b0010, 0b000>;
-def : RWSysReg<"BRBINFINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b000>;
-def : RWSysReg<"BRBSRCINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b001>;
-def : RWSysReg<"BRBTGTINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b010>;
-def : RWSysReg<"BRBTS_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b010>;
-foreach n = 0-31 in {
-  defvar nb = !cast<bits<5>>(n);
-  def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
-  def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
-  def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
-}
-}
-
-// Statistical Profiling Extension system register
-let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
-def : RWSysReg<"PMSNEVFR_EL1",      0b11, 0b000, 0b1001, 0b1001, 0b001>;
-
+// v8.7a LD64B/ST64B Accelerator Extension system register 
+let Requires = [{ {AArch64::FeatureLS64} }] in 
+def : RWSysReg<"ACCDATA_EL1",       0b11, 0b000, 0b1101, 0b0000, 0b101>; 
+ 
+// Branch Record Buffer system registers 
+let Requires = [{ {AArch64::FeatureBRBE} }] in { 
+def : RWSysReg<"BRBCR_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b000>; 
+def : RWSysReg<"BRBCR_EL12",        0b10, 0b101, 0b1001, 0b0000, 0b000>; 
+def : RWSysReg<"BRBCR_EL2",         0b10, 0b100, 0b1001, 0b0000, 0b000>; 
+def : RWSysReg<"BRBFCR_EL1",        0b10, 0b001, 0b1001, 0b0000, 0b001>; 
+def : ROSysReg<"BRBIDR0_EL1",       0b10, 0b001, 0b1001, 0b0010, 0b000>; 
+def : RWSysReg<"BRBINFINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b000>; 
+def : RWSysReg<"BRBSRCINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b001>; 
+def : RWSysReg<"BRBTGTINJ_EL1",     0b10, 0b001, 0b1001, 0b0001, 0b010>; 
+def : RWSysReg<"BRBTS_EL1",         0b10, 0b001, 0b1001, 0b0000, 0b010>; 
+foreach n = 0-31 in { 
+  defvar nb = !cast<bits<5>>(n); 
+  def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>; 
+  def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>; 
+  def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>; 
+} 
+} 
+ 
+// Statistical Profiling Extension system register 
+let Requires = [{ {AArch64::FeatureSPE_EEF} }] in 
+def : RWSysReg<"PMSNEVFR_EL1",      0b11, 0b000, 0b1001, 0b1001, 0b001>; 
+ 
 // Cyclone specific system registers
 //                                 Op0    Op1     CRn     CRm    Op2
 let Requires = [{ {AArch64::ProcAppleA7} }] in
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
index bec1758a93..5635b07fd6 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -148,10 +148,10 @@ static cl::opt<int> EnableGlobalISelAtO(
     cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
     cl::init(0));
 
-static cl::opt<bool>
-    EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
-                           cl::desc("Enable SVE intrinsic opts"),
-                           cl::init(true));
+static cl::opt<bool> 
+    EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden, 
+                           cl::desc("Enable SVE intrinsic opts"), 
+                           cl::init(true)); 
 
 static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
                                          cl::init(true), cl::Hidden);
@@ -184,8 +184,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64SIMDInstrOptPass(*PR);
   initializeAArch64PreLegalizerCombinerPass(*PR);
   initializeAArch64PostLegalizerCombinerPass(*PR);
-  initializeAArch64PostLegalizerLoweringPass(*PR);
-  initializeAArch64PostSelectOptimizePass(*PR);
+  initializeAArch64PostLegalizerLoweringPass(*PR); 
+  initializeAArch64PostSelectOptimizePass(*PR); 
   initializeAArch64PromoteConstantPass(*PR);
   initializeAArch64RedundantCopyEliminationPass(*PR);
   initializeAArch64StorePairSuppressPass(*PR);
@@ -222,18 +222,18 @@ static std::string computeDataLayout(const Triple &TT,
   }
   if (TT.isOSBinFormatCOFF())
     return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
-  std::string Endian = LittleEndian ? "e" : "E";
-  std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
-  return Endian + "-m:e" + Ptr32 +
-         "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
-}
-
-static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
-  if (CPU.empty() && TT.isArm64e())
-    return "apple-a12";
-  return CPU;
+  std::string Endian = LittleEndian ? "e" : "E"; 
+  std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : ""; 
+  return Endian + "-m:e" + Ptr32 + 
+         "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; 
 }
 
+static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) { 
+  if (CPU.empty() && TT.isArm64e()) 
+    return "apple-a12"; 
+  return CPU; 
+} 
+ 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
                                            Optional<Reloc::Model> RM) {
   // AArch64 Darwin and Windows are always PIC.
@@ -281,8 +281,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
                                            bool LittleEndian)
     : LLVMTargetMachine(T,
                         computeDataLayout(TT, Options.MCOptions, LittleEndian),
-                        TT, computeDefaultCPU(TT, CPU), FS, Options,
-                        getEffectiveRelocModel(TT, RM),
+                        TT, computeDefaultCPU(TT, CPU), FS, Options, 
+                        getEffectiveRelocModel(TT, RM), 
                         getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
       TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
   initAsmInfo();
@@ -317,7 +317,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
   // MachO/CodeModel::Large, which GlobalISel does not support.
   if (getOptLevel() <= EnableGlobalISelAtO &&
       TT.getArch() != Triple::aarch64_32 &&
-      TT.getEnvironment() != Triple::GNUILP32 &&
+      TT.getEnvironment() != Triple::GNUILP32 && 
       !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
     setGlobalISel(true);
     setGlobalISelAbort(GlobalISelAbortMode::Disable);
@@ -340,10 +340,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
   Attribute FSAttr = F.getFnAttribute("target-features");
 
-  std::string CPU =
-      CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
-  std::string FS =
-      FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+  std::string CPU = 
+      CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; 
+  std::string FS = 
+      FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; 
 
   auto &I = SubtargetMap[CPU + FS];
   if (!I) {
@@ -460,12 +460,12 @@ void AArch64PassConfig::addIRPasses() {
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(SimplifyCFGOptions()
-                                            .forwardSwitchCondToPhi(true)
-                                            .convertSwitchToLookupTable(true)
-                                            .needCanonicalLoops(false)
-                                            .hoistCommonInsts(true)
-                                            .sinkCommonInsts(true)));
+    addPass(createCFGSimplificationPass(SimplifyCFGOptions() 
+                                            .forwardSwitchCondToPhi(true) 
+                                            .convertSwitchToLookupTable(true) 
+                                            .needCanonicalLoops(false) 
+                                            .hoistCommonInsts(true) 
+                                            .sinkCommonInsts(true))); 
 
   // Run LoopDataPrefetch
   //
@@ -553,13 +553,13 @@ bool AArch64PassConfig::addInstSelector() {
 }
 
 bool AArch64PassConfig::addIRTranslator() {
-  addPass(new IRTranslator(getOptLevel()));
+  addPass(new IRTranslator(getOptLevel())); 
   return false;
 }
 
 void AArch64PassConfig::addPreLegalizeMachineIR() {
   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
-  addPass(createAArch64PreLegalizerCombiner(IsOptNone));
+  addPass(createAArch64PreLegalizerCombiner(IsOptNone)); 
 }
 
 bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -570,8 +570,8 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
 void AArch64PassConfig::addPreRegBankSelect() {
   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
   if (!IsOptNone)
-    addPass(createAArch64PostLegalizerCombiner(IsOptNone));
-  addPass(createAArch64PostLegalizerLowering());
+    addPass(createAArch64PostLegalizerCombiner(IsOptNone)); 
+  addPass(createAArch64PostLegalizerLowering()); 
 }
 
 bool AArch64PassConfig::addRegBankSelect() {
@@ -585,8 +585,8 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() {
 
 bool AArch64PassConfig::addGlobalInstructionSelect() {
   addPass(new InstructionSelect());
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createAArch64PostSelectOptimize());
+  if (getOptLevel() != CodeGenOpt::None) 
+    addPass(createAArch64PostSelectOptimize()); 
   return false;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
index 25e6261343..2420658743 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
@@ -57,12 +57,12 @@ public:
                                 SMDiagnostic &Error,
                                 SMRange &SourceRange) const override;
 
-  /// Returns true if a cast between SrcAS and DestAS is a noop.
-  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
-    // Addrspacecasts are always noops.
-    return true;
-  }
-
+  /// Returns true if a cast between SrcAS and DestAS is a noop. 
+  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { 
+    // Addrspacecasts are always noops. 
+    return true; 
+  } 
+ 
 private:
   bool isLittle;
 };
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7fda6b8fb6..d9f700a966 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64TargetTransformInfo.h"
+#include "AArch64TargetTransformInfo.h" 
 #include "AArch64ExpandImm.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -16,11 +16,11 @@
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h" 
 #include "llvm/Support/Debug.h"
 #include <algorithm>
 using namespace llvm;
-using namespace llvm::PatternMatch;
+using namespace llvm::PatternMatch; 
 
 #define DEBUG_TYPE "aarch64tti"
 
@@ -86,8 +86,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
 
 int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
                                       const APInt &Imm, Type *Ty,
-                                      TTI::TargetCostKind CostKind,
-                                      Instruction *Inst) {
+                                      TTI::TargetCostKind CostKind, 
+                                      Instruction *Inst) { 
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -195,10 +195,10 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
     if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
       return TTI::TCC_Free;
     break;
-  case Intrinsic::experimental_gc_statepoint:
-    if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-      return TTI::TCC_Free;
-    break;
+  case Intrinsic::experimental_gc_statepoint: 
+    if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 
+      return TTI::TCC_Free; 
+    break; 
   }
   return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
 }
@@ -212,43 +212,43 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
   return TTI::PSK_Software;
 }
 
-unsigned
-AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                                      TTI::TargetCostKind CostKind) {
-  auto *RetTy = ICA.getReturnType();
-  switch (ICA.getID()) {
-  case Intrinsic::umin:
-  case Intrinsic::umax: {
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
-    // umin(x,y) -> sub(x,usubsat(x,y))
-    // umax(x,y) -> add(x,usubsat(y,x))
-    if (LT.second == MVT::v2i64)
-      return LT.first * 2;
-    LLVM_FALLTHROUGH;
-  }
-  case Intrinsic::smin:
-  case Intrinsic::smax: {
-    static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
-                                        MVT::v8i16, MVT::v2i32, MVT::v4i32};
-    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
-    if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
-      return LT.first;
-    break;
-  }
-  default:
-    break;
-  }
-  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-}
-
+unsigned 
+AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 
+                                      TTI::TargetCostKind CostKind) { 
+  auto *RetTy = ICA.getReturnType(); 
+  switch (ICA.getID()) { 
+  case Intrinsic::umin: 
+  case Intrinsic::umax: { 
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy); 
+    // umin(x,y) -> sub(x,usubsat(x,y)) 
+    // umax(x,y) -> add(x,usubsat(y,x)) 
+    if (LT.second == MVT::v2i64) 
+      return LT.first * 2; 
+    LLVM_FALLTHROUGH; 
+  } 
+  case Intrinsic::smin: 
+  case Intrinsic::smax: { 
+    static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16, 
+                                        MVT::v8i16, MVT::v2i32, MVT::v4i32}; 
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy); 
+    if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; })) 
+      return LT.first; 
+    break; 
+  } 
+  default: 
+    break; 
+  } 
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind); 
+} 
+ 
 bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
                                            ArrayRef<const Value *> Args) {
 
   // A helper that returns a vector type from the given type. The number of
   // elements in type Ty determine the vector width.
   auto toVectorTy = [&](Type *ArgTy) {
-    return VectorType::get(ArgTy->getScalarType(),
-                           cast<VectorType>(DstTy)->getElementCount());
+    return VectorType::get(ArgTy->getScalarType(), 
+                           cast<VectorType>(DstTy)->getElementCount()); 
   };
 
   // Exit early if DstTy is not a vector type whose elements are at least
@@ -297,8 +297,8 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
     return false;
 
   // Get the total number of vector elements in the legalized types.
-  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
-  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
+  unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements(); 
+  unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); 
 
   // Return true if the legalized types have the same number of vector elements
   // and the destination element type size is twice that of the source type.
@@ -306,7 +306,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
 }
 
 int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                                     TTI::CastContextHint CCH,
+                                     TTI::CastContextHint CCH, 
                                      TTI::TargetCostKind CostKind,
                                      const Instruction *I) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -343,8 +343,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
   EVT DstTy = TLI->getValueType(DL, Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return AdjustCost(
-        BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+    return AdjustCost( 
+        BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); 
 
   static const TypeConversionCostTblEntry
   ConversionTbl[] = {
@@ -448,8 +448,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                                                  SrcTy.getSimpleVT()))
     return AdjustCost(Entry->Cost);
 
-  return AdjustCost(
-      BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+  return AdjustCost( 
+      BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); 
 }
 
 int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -481,14 +481,14 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
   // we may get the extension for free. If not, get the default cost for the
   // extend.
   if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
-    return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
-                                   CostKind);
+    return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, 
+                                   CostKind); 
 
   // The destination type should be larger than the element type. If not, get
   // the default cost for the extend.
-  if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
-    return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
-                                   CostKind);
+  if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits()) 
+    return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, 
+                                   CostKind); 
 
   switch (Opcode) {
   default:
@@ -507,8 +507,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
   }
 
   // If we are unable to perform the extend for free, get the default cost.
-  return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
-                                 CostKind);
+  return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, 
+                                 CostKind); 
 }
 
 unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
@@ -644,19 +644,19 @@ int AArch64TTIImpl::getArithmeticInstrCost(
     }
     return Cost;
 
-  case ISD::MUL:
-    if (LT.second != MVT::v2i64)
-      return (Cost + 1) * LT.first;
-    // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
-    // as elements are extracted from the vectors and the muls scalarized.
-    // As getScalarizationOverhead is a bit too pessimistic, we estimate the
-    // cost for a i64 vector directly here, which is:
-    // - four i64 extracts,
-    // - two i64 inserts, and
-    // - two muls.
-    // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
-    // LT.first = 2 the cost is 16.
-    return LT.first * 8;
+  case ISD::MUL: 
+    if (LT.second != MVT::v2i64) 
+      return (Cost + 1) * LT.first; 
+    // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive 
+    // as elements are extracted from the vectors and the muls scalarized. 
+    // As getScalarizationOverhead is a bit too pessimistic, we estimate the 
+    // cost for a i64 vector directly here, which is: 
+    // - four i64 extracts, 
+    // - two i64 inserts, and 
+    // - two muls. 
+    // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with 
+    // LT.first = 2 the cost is 16. 
+    return LT.first * 8; 
   case ISD::ADD:
   case ISD::XOR:
   case ISD::OR:
@@ -696,40 +696,40 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 }
 
 int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                       Type *CondTy, CmpInst::Predicate VecPred,
+                                       Type *CondTy, CmpInst::Predicate VecPred, 
                                        TTI::TargetCostKind CostKind,
                                        const Instruction *I) {
   // TODO: Handle other cost kinds.
   if (CostKind != TTI::TCK_RecipThroughput)
-    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
-                                     I);
+    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, 
+                                     I); 
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // We don't lower some vector selects well that are wider than the register
   // width.
-  if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
+  if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) { 
     // We would need this many instructions to hide the scalarization happening.
     const int AmortizationCost = 20;
-
-    // If VecPred is not set, check if we can get a predicate from the context
-    // instruction, if its type matches the requested ValTy.
-    if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
-      CmpInst::Predicate CurrentPred;
-      if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
-                            m_Value())))
-        VecPred = CurrentPred;
-    }
-    // Check if we have a compare/select chain that can be lowered using CMxx &
-    // BFI pair.
-    if (CmpInst::isIntPredicate(VecPred)) {
-      static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
-                                          MVT::v8i16, MVT::v2i32, MVT::v4i32,
-                                          MVT::v2i64};
-      auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
-      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
-        return LT.first;
-    }
-
+ 
+    // If VecPred is not set, check if we can get a predicate from the context 
+    // instruction, if its type matches the requested ValTy. 
+    if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) { 
+      CmpInst::Predicate CurrentPred; 
+      if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(), 
+                            m_Value()))) 
+        VecPred = CurrentPred; 
+    } 
+    // Check if we have a compare/select chain that can be lowered using CMxx & 
+    // BFI pair. 
+    if (CmpInst::isIntPredicate(VecPred)) { 
+      static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16, 
+                                          MVT::v8i16, MVT::v2i32, MVT::v4i32, 
+                                          MVT::v2i64}; 
+      auto LT = TLI->getTypeLegalizationCost(DL, ValTy); 
+      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; })) 
+        return LT.first; 
+    } 
+ 
     static const TypeConversionCostTblEntry
     VectorSelectTbl[] = {
       { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
@@ -749,9 +749,9 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
         return Entry->Cost;
     }
   }
-  // The base case handles scalable vectors fine for now, since it treats the
-  // cost as 1 * legalization cost.
-  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
+  // The base case handles scalable vectors fine for now, since it treats the 
+  // cost as 1 * legalization cost. 
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); 
 }
 
 AArch64TTIImpl::TTI::MemCmpExpansionOptions
@@ -772,30 +772,30 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
   return Options;
 }
 
-unsigned AArch64TTIImpl::getGatherScatterOpCost(
-    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
-    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
-
-  if (!isa<ScalableVectorType>(DataTy))
-    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
-                                         Alignment, CostKind, I);
-  auto *VT = cast<VectorType>(DataTy);
-  auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
-  ElementCount LegalVF = LT.second.getVectorElementCount();
-  Optional<unsigned> MaxNumVScale = getMaxVScale();
-  assert(MaxNumVScale && "Expected valid max vscale value");
-
-  unsigned MemOpCost =
-      getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
-  unsigned MaxNumElementsPerGather =
-      MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
-  return LT.first * MaxNumElementsPerGather * MemOpCost;
-}
-
-bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
-  return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
-}
-
+unsigned AArch64TTIImpl::getGatherScatterOpCost( 
+    unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, 
+    Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { 
+ 
+  if (!isa<ScalableVectorType>(DataTy)) 
+    return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 
+                                         Alignment, CostKind, I); 
+  auto *VT = cast<VectorType>(DataTy); 
+  auto LT = TLI->getTypeLegalizationCost(DL, DataTy); 
+  ElementCount LegalVF = LT.second.getVectorElementCount(); 
+  Optional<unsigned> MaxNumVScale = getMaxVScale(); 
+  assert(MaxNumVScale && "Expected valid max vscale value"); 
+ 
+  unsigned MemOpCost = 
+      getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I); 
+  unsigned MaxNumElementsPerGather = 
+      MaxNumVScale.getValue() * LegalVF.getKnownMinValue(); 
+  return LT.first * MaxNumElementsPerGather * MemOpCost; 
+} 
+ 
+bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { 
+  return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors(); 
+} 
+ 
 int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
                                     MaybeAlign Alignment, unsigned AddressSpace,
                                     TTI::TargetCostKind CostKind,
@@ -823,7 +823,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
     return LT.first * 2 * AmortizationCost;
   }
 
-  if (useNeonVector(Ty) &&
+  if (useNeonVector(Ty) && 
       cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
     unsigned ProfitableNumElements;
     if (Opcode == Instruction::Store)
@@ -1098,70 +1098,70 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
   return false;
 }
 
-int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                                           bool IsPairwise, bool IsUnsigned,
-                                           TTI::TargetCostKind CostKind) {
-  if (!isa<ScalableVectorType>(Ty))
-    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
-                                         CostKind);
-  assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
-         "Both vector needs to be scalable");
-
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-  int LegalizationCost = 0;
-  if (LT.first > 1) {
-    Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
-    unsigned CmpOpcode =
-        Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
-    LegalizationCost =
-        getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
-                           CmpInst::BAD_ICMP_PREDICATE, CostKind) +
-        getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
-                           CmpInst::BAD_ICMP_PREDICATE, CostKind);
-    LegalizationCost *= LT.first - 1;
-  }
-
-  return LegalizationCost + /*Cost of horizontal reduction*/ 2;
-}
-
-int AArch64TTIImpl::getArithmeticReductionCostSVE(
-    unsigned Opcode, VectorType *ValTy, bool IsPairwise,
-    TTI::TargetCostKind CostKind) {
-  assert(!IsPairwise && "Cannot be pair wise to continue");
-
-  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
-  int LegalizationCost = 0;
-  if (LT.first > 1) {
-    Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
-    LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
-    LegalizationCost *= LT.first - 1;
-  }
-
-  int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-  // Add the final reduction cost for the legal horizontal reduction
-  switch (ISD) {
-  case ISD::ADD:
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
-  case ISD::FADD:
-    return LegalizationCost + 2;
-  default:
-    // TODO: Replace for invalid when InstructionCost is used
-    // cases not supported by SVE
-    return 16;
-  }
-}
-
+int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 
+                                           bool IsPairwise, bool IsUnsigned, 
+                                           TTI::TargetCostKind CostKind) { 
+  if (!isa<ScalableVectorType>(Ty)) 
+    return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, 
+                                         CostKind); 
+  assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) && 
+         "Both vector needs to be scalable"); 
+ 
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); 
+  int LegalizationCost = 0; 
+  if (LT.first > 1) { 
+    Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); 
+    unsigned CmpOpcode = 
+        Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp; 
+    LegalizationCost = 
+        getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy, 
+                           CmpInst::BAD_ICMP_PREDICATE, CostKind) + 
+        getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy, 
+                           CmpInst::BAD_ICMP_PREDICATE, CostKind); 
+    LegalizationCost *= LT.first - 1; 
+  } 
+ 
+  return LegalizationCost + /*Cost of horizontal reduction*/ 2; 
+} 
+ 
+int AArch64TTIImpl::getArithmeticReductionCostSVE( 
+    unsigned Opcode, VectorType *ValTy, bool IsPairwise, 
+    TTI::TargetCostKind CostKind) { 
+  assert(!IsPairwise && "Cannot be pair wise to continue"); 
+ 
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); 
+  int LegalizationCost = 0; 
+  if (LT.first > 1) { 
+    Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext()); 
+    LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind); 
+    LegalizationCost *= LT.first - 1; 
+  } 
+ 
+  int ISD = TLI->InstructionOpcodeToISD(Opcode); 
+  assert(ISD && "Invalid opcode"); 
+  // Add the final reduction cost for the legal horizontal reduction 
+  switch (ISD) { 
+  case ISD::ADD: 
+  case ISD::AND: 
+  case ISD::OR: 
+  case ISD::XOR: 
+  case ISD::FADD: 
+    return LegalizationCost + 2; 
+  default: 
+    // TODO: Replace for invalid when InstructionCost is used 
+    // cases not supported by SVE 
+    return 16; 
+  } 
+} 
+ 
 int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
                                                VectorType *ValTy,
                                                bool IsPairwiseForm,
                                                TTI::TargetCostKind CostKind) {
 
-  if (isa<ScalableVectorType>(ValTy))
-    return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
-                                         CostKind);
+  if (isa<ScalableVectorType>(ValTy)) 
+    return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm, 
+                                         CostKind); 
   if (IsPairwiseForm)
     return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
                                              CostKind);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 7c9360ada9..f669e3f595 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -74,8 +74,8 @@ public:
   int getIntImmCost(int64_t Val);
   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                        Type *Ty, TTI::TargetCostKind CostKind,
-                        Instruction *Inst = nullptr);
+                        Type *Ty, TTI::TargetCostKind CostKind, 
+                        Instruction *Inst = nullptr); 
   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                           Type *Ty, TTI::TargetCostKind CostKind);
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@@ -97,9 +97,9 @@ public:
     return 31;
   }
 
-  unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
-                                 TTI::TargetCostKind CostKind);
-
+  unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 
+                                 TTI::TargetCostKind CostKind); 
+ 
   unsigned getRegisterBitWidth(bool Vector) const {
     if (Vector) {
       if (ST->hasSVE())
@@ -115,21 +115,21 @@ public:
     return ST->getMinVectorRegisterBitWidth();
   }
 
-  Optional<unsigned> getMaxVScale() const {
-    if (ST->hasSVE())
-      return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
-    return BaseT::getMaxVScale();
-  }
-
+  Optional<unsigned> getMaxVScale() const { 
+    if (ST->hasSVE()) 
+      return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; 
+    return BaseT::getMaxVScale(); 
+  } 
+ 
   unsigned getMaxInterleaveFactor(unsigned VF);
 
-  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
-                                  const Value *Ptr, bool VariableMask,
-                                  Align Alignment, TTI::TargetCostKind CostKind,
-                                  const Instruction *I = nullptr);
-
+  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 
+                                  const Value *Ptr, bool VariableMask, 
+                                  Align Alignment, TTI::TargetCostKind CostKind, 
+                                  const Instruction *I = nullptr); 
+ 
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
-                       TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
+                       TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, 
                        const Instruction *I = nullptr);
 
   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
@@ -139,14 +139,14 @@ public:
 
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
-  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
-                             bool IsPairwise, bool IsUnsigned,
-                             TTI::TargetCostKind CostKind);
-
-  int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
-                                    bool IsPairwiseForm,
-                                    TTI::TargetCostKind CostKind);
-
+  int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 
+                             bool IsPairwise, bool IsUnsigned, 
+                             TTI::TargetCostKind CostKind); 
+ 
+  int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, 
+                                    bool IsPairwiseForm, 
+                                    TTI::TargetCostKind CostKind); 
+ 
   int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
@@ -160,13 +160,13 @@ public:
   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
 
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
-                         CmpInst::Predicate VecPred,
+                         CmpInst::Predicate VecPred, 
                          TTI::TargetCostKind CostKind,
                          const Instruction *I = nullptr);
 
   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
                                                     bool IsZeroCmp) const;
-  bool useNeonVector(const Type *Ty) const;
+  bool useNeonVector(const Type *Ty) const; 
 
   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
                       unsigned AddressSpace,
@@ -191,9 +191,9 @@ public:
       return false;
 
     Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
-    if (Ty->isPointerTy())
-      return true;
-
+    if (Ty->isPointerTy()) 
+      return true; 
+ 
     if (Ty->isBFloatTy() || Ty->isHalfTy() ||
         Ty->isFloatTy() || Ty->isDoubleTy())
       return true;
@@ -241,14 +241,14 @@ public:
   shouldConsiderAddressTypePromotion(const Instruction &I,
                                      bool &AllowPromotionWithoutCommonHeader);
 
-  bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+  bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 
 
   unsigned getGISelRematGlobalCost() const {
     return 2;
   }
 
-  bool supportsScalableVectors() const { return ST->hasSVE(); }
-
+  bool supportsScalableVectors() const { return ST->hasSVE(); } 
+ 
   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                              TTI::ReductionFlags Flags) const;
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 96c50ff3f8..d69e2b127c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64InstPrinter.h" 
 #include "MCTargetDesc/AArch64MCExpr.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "MCTargetDesc/AArch64TargetStreamer.h"
@@ -159,13 +159,13 @@ private:
   bool parseSymbolicImmVal(const MCExpr *&ImmVal);
   bool parseNeonVectorList(OperandVector &Operands);
   bool parseOptionalMulOperand(OperandVector &Operands);
-  bool parseKeywordOperand(OperandVector &Operands);
+  bool parseKeywordOperand(OperandVector &Operands); 
   bool parseOperand(OperandVector &Operands, bool isCondCode,
                     bool invertCondCode);
-  bool parseImmExpr(int64_t &Out);
-  bool parseComma();
-  bool parseRegisterInRange(unsigned &Out, unsigned Base, unsigned First,
-                            unsigned Last);
+  bool parseImmExpr(int64_t &Out); 
+  bool parseComma(); 
+  bool parseRegisterInRange(unsigned &Out, unsigned Base, unsigned First, 
+                            unsigned Last); 
 
   bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
                       OperandVector &Operands);
@@ -187,31 +187,31 @@ private:
 
   bool parseDirectiveVariantPCS(SMLoc L);
 
-  bool parseDirectiveSEHAllocStack(SMLoc L);
-  bool parseDirectiveSEHPrologEnd(SMLoc L);
-  bool parseDirectiveSEHSaveR19R20X(SMLoc L);
-  bool parseDirectiveSEHSaveFPLR(SMLoc L);
-  bool parseDirectiveSEHSaveFPLRX(SMLoc L);
-  bool parseDirectiveSEHSaveReg(SMLoc L);
-  bool parseDirectiveSEHSaveRegX(SMLoc L);
-  bool parseDirectiveSEHSaveRegP(SMLoc L);
-  bool parseDirectiveSEHSaveRegPX(SMLoc L);
-  bool parseDirectiveSEHSaveLRPair(SMLoc L);
-  bool parseDirectiveSEHSaveFReg(SMLoc L);
-  bool parseDirectiveSEHSaveFRegX(SMLoc L);
-  bool parseDirectiveSEHSaveFRegP(SMLoc L);
-  bool parseDirectiveSEHSaveFRegPX(SMLoc L);
-  bool parseDirectiveSEHSetFP(SMLoc L);
-  bool parseDirectiveSEHAddFP(SMLoc L);
-  bool parseDirectiveSEHNop(SMLoc L);
-  bool parseDirectiveSEHSaveNext(SMLoc L);
-  bool parseDirectiveSEHEpilogStart(SMLoc L);
-  bool parseDirectiveSEHEpilogEnd(SMLoc L);
-  bool parseDirectiveSEHTrapFrame(SMLoc L);
-  bool parseDirectiveSEHMachineFrame(SMLoc L);
-  bool parseDirectiveSEHContext(SMLoc L);
-  bool parseDirectiveSEHClearUnwoundToCall(SMLoc L);
-
+  bool parseDirectiveSEHAllocStack(SMLoc L); 
+  bool parseDirectiveSEHPrologEnd(SMLoc L); 
+  bool parseDirectiveSEHSaveR19R20X(SMLoc L); 
+  bool parseDirectiveSEHSaveFPLR(SMLoc L); 
+  bool parseDirectiveSEHSaveFPLRX(SMLoc L); 
+  bool parseDirectiveSEHSaveReg(SMLoc L); 
+  bool parseDirectiveSEHSaveRegX(SMLoc L); 
+  bool parseDirectiveSEHSaveRegP(SMLoc L); 
+  bool parseDirectiveSEHSaveRegPX(SMLoc L); 
+  bool parseDirectiveSEHSaveLRPair(SMLoc L); 
+  bool parseDirectiveSEHSaveFReg(SMLoc L); 
+  bool parseDirectiveSEHSaveFRegX(SMLoc L); 
+  bool parseDirectiveSEHSaveFRegP(SMLoc L); 
+  bool parseDirectiveSEHSaveFRegPX(SMLoc L); 
+  bool parseDirectiveSEHSetFP(SMLoc L); 
+  bool parseDirectiveSEHAddFP(SMLoc L); 
+  bool parseDirectiveSEHNop(SMLoc L); 
+  bool parseDirectiveSEHSaveNext(SMLoc L); 
+  bool parseDirectiveSEHEpilogStart(SMLoc L); 
+  bool parseDirectiveSEHEpilogEnd(SMLoc L); 
+  bool parseDirectiveSEHTrapFrame(SMLoc L); 
+  bool parseDirectiveSEHMachineFrame(SMLoc L); 
+  bool parseDirectiveSEHContext(SMLoc L); 
+  bool parseDirectiveSEHClearUnwoundToCall(SMLoc L); 
+ 
   bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
                            SmallVectorImpl<SMLoc> &Loc);
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -231,7 +231,7 @@ private:
                                               RegKind MatchKind);
   OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
   OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
-  OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
+  OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands); 
   OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
   OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
   OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
@@ -258,7 +258,7 @@ private:
   OperandMatchResultTy tryParseVectorList(OperandVector &Operands,
                                           bool ExpectMatch = false);
   OperandMatchResultTy tryParseSVEPattern(OperandVector &Operands);
-  OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands);
+  OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands); 
 
 public:
   enum AArch64MatchResultTy {
@@ -271,7 +271,7 @@ public:
   AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                    const MCInstrInfo &MII, const MCTargetOptions &Options)
     : MCTargetAsmParser(Options, STI, MII) {
-    IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+    IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32; 
     MCAsmParserExtension::Initialize(Parser);
     MCStreamer &S = getParser().getStreamer();
     if (S.getTargetStreamer() == nullptr)
@@ -404,7 +404,7 @@ private:
     const char *Data;
     unsigned Length;
     unsigned Val; // Not the enum since not all values have names.
-    bool HasnXSModifier;
+    bool HasnXSModifier; 
   };
 
   struct SysRegOp {
@@ -574,11 +574,11 @@ public:
     return StringRef(Barrier.Data, Barrier.Length);
   }
 
-  bool getBarriernXSModifier() const {
-    assert(Kind == k_Barrier && "Invalid access!");
-    return Barrier.HasnXSModifier;
-  }
-
+  bool getBarriernXSModifier() const { 
+    assert(Kind == k_Barrier && "Invalid access!"); 
+    return Barrier.HasnXSModifier; 
+  } 
+ 
   unsigned getReg() const override {
     assert(Kind == k_Register && "Invalid access!");
     return Reg.RegNum;
@@ -750,8 +750,8 @@ public:
         ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC ||
         ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12 ||
         ELFRefKind == AArch64MCExpr::VK_SECREL_LO12 ||
-        ELFRefKind == AArch64MCExpr::VK_SECREL_HI12 ||
-        ELFRefKind == AArch64MCExpr::VK_GOT_PAGE_LO15) {
+        ELFRefKind == AArch64MCExpr::VK_SECREL_HI12 || 
+        ELFRefKind == AArch64MCExpr::VK_GOT_PAGE_LO15) { 
       // Note that we don't range-check the addend. It's adjusted modulo page
       // size when converted, so there is no "out of range" condition when using
       // @pageoff.
@@ -897,8 +897,8 @@ public:
     if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
       return DiagnosticPredicateTy::NoMatch;
 
-    bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
-                  std::is_same<int8_t, T>::value;
+    bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value || 
+                  std::is_same<int8_t, T>::value; 
     if (auto ShiftedImm = getShiftedVal<8>())
       if (!(IsByte && ShiftedImm->second) &&
           AArch64_AM::isSVECpyImm<T>(uint64_t(ShiftedImm->first)
@@ -915,8 +915,8 @@ public:
     if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
       return DiagnosticPredicateTy::NoMatch;
 
-    bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
-                  std::is_same<int8_t, T>::value;
+    bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value || 
+                  std::is_same<int8_t, T>::value; 
     if (auto ShiftedImm = getShiftedVal<8>())
       if (!(IsByte && ShiftedImm->second) &&
           AArch64_AM::isSVEAddSubImm<T>(ShiftedImm->first
@@ -1041,12 +1041,12 @@ public:
            AArch64_AM::getFP64Imm(getFPImm().bitcastToAPInt()) != -1;
   }
 
-  bool isBarrier() const {
-    return Kind == k_Barrier && !getBarriernXSModifier();
-  }
-  bool isBarriernXS() const {
-    return Kind == k_Barrier && getBarriernXSModifier();
-  }
+  bool isBarrier() const { 
+    return Kind == k_Barrier && !getBarriernXSModifier(); 
+  } 
+  bool isBarriernXS() const { 
+    return Kind == k_Barrier && getBarriernXSModifier(); 
+  } 
   bool isSysReg() const { return Kind == k_SysReg; }
 
   bool isMRSSystemRegister() const {
@@ -1173,12 +1173,12 @@ public:
       AArch64MCRegisterClasses[AArch64::GPR32RegClassID].contains(Reg.RegNum);
   }
 
-  bool isGPR64x8() const {
-    return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
-           AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].contains(
-               Reg.RegNum);
-  }
-
+  bool isGPR64x8() const { 
+    return Kind == k_Register && Reg.Kind == RegKind::Scalar && 
+           AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].contains( 
+               Reg.RegNum); 
+  } 
+ 
   bool isWSeqPair() const {
     return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
            AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
@@ -1742,11 +1742,11 @@ public:
     Inst.addOperand(MCOperand::createImm(getBarrier()));
   }
 
-  void addBarriernXSOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::createImm(getBarrier()));
-  }
-
+  void addBarriernXSOperands(MCInst &Inst, unsigned N) const { 
+    assert(N == 1 && "Invalid number of operands!"); 
+    Inst.addOperand(MCOperand::createImm(getBarrier())); 
+  } 
+ 
   void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
 
@@ -1982,13 +1982,13 @@ public:
   static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
                                                        StringRef Str,
                                                        SMLoc S,
-                                                       MCContext &Ctx,
-                                                       bool HasnXSModifier) {
+                                                       MCContext &Ctx, 
+                                                       bool HasnXSModifier) { 
     auto Op = std::make_unique<AArch64Operand>(k_Barrier, Ctx);
     Op->Barrier.Val = Val;
     Op->Barrier.Data = Str.data();
     Op->Barrier.Length = Str.size();
-    Op->Barrier.HasnXSModifier = HasnXSModifier;
+    Op->Barrier.HasnXSModifier = HasnXSModifier; 
     Op->StartLoc = S;
     Op->EndLoc = S;
     return Op;
@@ -2133,9 +2133,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
   case k_PSBHint:
     OS << getPSBHintName();
     break;
-  case k_BTIHint:
-    OS << getBTIHintName();
-    break;
+  case k_BTIHint: 
+    OS << getBTIHintName(); 
+    break; 
   case k_Register:
     OS << "<register " << getReg() << ">";
     if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@@ -2570,7 +2570,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
                DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
                ELFRefKind != AArch64MCExpr::VK_ABS_PAGE_NC &&
                ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
-               ELFRefKind != AArch64MCExpr::VK_GOT_PAGE_LO15 &&
+               ELFRefKind != AArch64MCExpr::VK_GOT_PAGE_LO15 && 
                ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
                ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
       // The operand must be an @page or @gotpage qualified symbolref.
@@ -2904,7 +2904,7 @@ static const struct Extension {
     {"predres", {AArch64::FeaturePredRes}},
     {"ccdp", {AArch64::FeatureCacheDeepPersist}},
     {"mte", {AArch64::FeatureMTE}},
-    {"memtag", {AArch64::FeatureMTE}},
+    {"memtag", {AArch64::FeatureMTE}}, 
     {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
     {"pan-rwv", {AArch64::FeaturePAN_RWV}},
     {"ccpp", {AArch64::FeatureCCPP}},
@@ -2915,10 +2915,10 @@ static const struct Extension {
     {"sve2-sm4", {AArch64::FeatureSVE2SM4}},
     {"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
     {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
-    {"ls64", {AArch64::FeatureLS64}},
-    {"xs", {AArch64::FeatureXS}},
-    {"pauth", {AArch64::FeaturePAuth}},
-    {"flagm", {AArch64::FeatureFlagM}},
+    {"ls64", {AArch64::FeatureLS64}}, 
+    {"xs", {AArch64::FeatureXS}}, 
+    {"pauth", {AArch64::FeaturePAuth}}, 
+    {"flagm", {AArch64::FeatureFlagM}}, 
     // FIXME: Unsupported extensions
     {"pan", {}},
     {"lor", {}},
@@ -2939,16 +2939,16 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
     Str += "ARMv8.5a";
   else if (FBS[AArch64::HasV8_6aOps])
     Str += "ARMv8.6a";
-  else if (FBS[AArch64::HasV8_7aOps])
-    Str += "ARMv8.7a";
+  else if (FBS[AArch64::HasV8_7aOps]) 
+    Str += "ARMv8.7a"; 
   else {
-    SmallVector<std::string, 2> ExtMatches;
-    for (const auto& Ext : ExtensionMap) {
+    SmallVector<std::string, 2> ExtMatches; 
+    for (const auto& Ext : ExtensionMap) { 
       // Use & in case multiple features are enabled
-      if ((FBS & Ext.Features) != FeatureBitset())
-        ExtMatches.push_back(Ext.Name);
-    }
-    Str += !ExtMatches.empty() ? llvm::join(ExtMatches, ", ") : "(unknown)";
+      if ((FBS & Ext.Features) != FeatureBitset()) 
+        ExtMatches.push_back(Ext.Name); 
+    } 
+    Str += !ExtMatches.empty() ? llvm::join(ExtMatches, ", ") : "(unknown)"; 
   }
 }
 
@@ -2993,7 +2993,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (!IC)
       return TokError("invalid operand for IC instruction");
     else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
-      std::string Str("IC " + std::string(IC->Name) + " requires: ");
+      std::string Str("IC " + std::string(IC->Name) + " requires: "); 
       setRequiredFeatureString(IC->getRequiredFeatures(), Str);
       return TokError(Str.c_str());
     }
@@ -3003,7 +3003,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (!DC)
       return TokError("invalid operand for DC instruction");
     else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
-      std::string Str("DC " + std::string(DC->Name) + " requires: ");
+      std::string Str("DC " + std::string(DC->Name) + " requires: "); 
       setRequiredFeatureString(DC->getRequiredFeatures(), Str);
       return TokError(Str.c_str());
     }
@@ -3013,7 +3013,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (!AT)
       return TokError("invalid operand for AT instruction");
     else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
-      std::string Str("AT " + std::string(AT->Name) + " requires: ");
+      std::string Str("AT " + std::string(AT->Name) + " requires: "); 
       setRequiredFeatureString(AT->getRequiredFeatures(), Str);
       return TokError(Str.c_str());
     }
@@ -3023,7 +3023,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (!TLBI)
       return TokError("invalid operand for TLBI instruction");
     else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
-      std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
+      std::string Str("TLBI " + std::string(TLBI->Name) + " requires: "); 
       setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
       return TokError(Str.c_str());
     }
@@ -3034,7 +3034,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
       return TokError("invalid operand for prediction restriction instruction");
     else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
       std::string Str(
-          Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
+          Mnemonic.upper() + std::string(PRCTX->Name) + " requires: "); 
       setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
       return TokError(Str.c_str());
     }
@@ -3082,7 +3082,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     // Immediate operand.
     const MCExpr *ImmVal;
     SMLoc ExprLoc = getLoc();
-    AsmToken IntTok = Tok;
+    AsmToken IntTok = Tok; 
     if (getParser().parseExpression(ImmVal))
       return MatchOperand_ParseFail;
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
@@ -3090,22 +3090,22 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
       Error(ExprLoc, "immediate value expected for barrier operand");
       return MatchOperand_ParseFail;
     }
-    int64_t Value = MCE->getValue();
-    if (Mnemonic == "dsb" && Value > 15) {
-      // This case is a no match here, but it might be matched by the nXS
-      // variant. Deliberately not unlex the optional '#' as it is not necessary
-      // to characterize an integer immediate.
-      Parser.getLexer().UnLex(IntTok);
-      return MatchOperand_NoMatch;
-    }
-    if (Value < 0 || Value > 15) {
+    int64_t Value = MCE->getValue(); 
+    if (Mnemonic == "dsb" && Value > 15) { 
+      // This case is a no match here, but it might be matched by the nXS 
+      // variant. Deliberately not unlex the optional '#' as it is not necessary 
+      // to characterize an integer immediate. 
+      Parser.getLexer().UnLex(IntTok); 
+      return MatchOperand_NoMatch; 
+    } 
+    if (Value < 0 || Value > 15) { 
       Error(ExprLoc, "barrier operand out of range");
       return MatchOperand_ParseFail;
     }
-    auto DB = AArch64DB::lookupDBByEncoding(Value);
-    Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "",
-                                                     ExprLoc, getContext(),
-                                                     false /*hasnXSModifier*/));
+    auto DB = AArch64DB::lookupDBByEncoding(Value); 
+    Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "", 
+                                                     ExprLoc, getContext(), 
+                                                     false /*hasnXSModifier*/)); 
     return MatchOperand_Success;
   }
 
@@ -3114,9 +3114,9 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     return MatchOperand_ParseFail;
   }
 
-  StringRef Operand = Tok.getString();
-  auto TSB = AArch64TSB::lookupTSBByName(Operand);
-  auto DB = AArch64DB::lookupDBByName(Operand);
+  StringRef Operand = Tok.getString(); 
+  auto TSB = AArch64TSB::lookupTSBByName(Operand); 
+  auto DB = AArch64DB::lookupDBByName(Operand); 
   // The only valid named option for ISB is 'sy'
   if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) {
     TokError("'sy' or #imm operand expected");
@@ -3126,79 +3126,79 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     TokError("'csync' operand expected");
     return MatchOperand_ParseFail;
   } else if (!DB && !TSB) {
-    if (Mnemonic == "dsb") {
-      // This case is a no match here, but it might be matched by the nXS
-      // variant.
-      return MatchOperand_NoMatch;
-    }
+    if (Mnemonic == "dsb") { 
+      // This case is a no match here, but it might be matched by the nXS 
+      // variant. 
+      return MatchOperand_NoMatch; 
+    } 
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
   }
 
   Operands.push_back(AArch64Operand::CreateBarrier(
-      DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
-      getContext(), false /*hasnXSModifier*/));
-  Parser.Lex(); // Consume the option
-
-  return MatchOperand_Success;
-}
-
-OperandMatchResultTy
-AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
-  MCAsmParser &Parser = getParser();
-  const AsmToken &Tok = Parser.getTok();
-
-  assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
-  if (Mnemonic != "dsb")
-    return MatchOperand_ParseFail;
-
-  if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
-    // Immediate operand.
-    const MCExpr *ImmVal;
-    SMLoc ExprLoc = getLoc();
-    if (getParser().parseExpression(ImmVal))
-      return MatchOperand_ParseFail;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      Error(ExprLoc, "immediate value expected for barrier operand");
-      return MatchOperand_ParseFail;
-    }
-    int64_t Value = MCE->getValue();
-    // v8.7-A DSB in the nXS variant accepts only the following immediate
-    // values: 16, 20, 24, 28.
-    if (Value != 16 && Value != 20 && Value != 24 && Value != 28) {
-      Error(ExprLoc, "barrier operand out of range");
-      return MatchOperand_ParseFail;
-    }
-    auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value);
-    Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name,
-                                                     ExprLoc, getContext(),
-                                                     true /*hasnXSModifier*/));
-    return MatchOperand_Success;
-  }
-
-  if (Tok.isNot(AsmToken::Identifier)) {
-    TokError("invalid operand for instruction");
-    return MatchOperand_ParseFail;
-  }
-
-  StringRef Operand = Tok.getString();
-  auto DB = AArch64DBnXS::lookupDBnXSByName(Operand);
-
-  if (!DB) {
-    TokError("invalid barrier option name");
-    return MatchOperand_ParseFail;
-  }
-
-  Operands.push_back(
-      AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
-                                    getContext(), true /*hasnXSModifier*/));
+      DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(), 
+      getContext(), false /*hasnXSModifier*/)); 
   Parser.Lex(); // Consume the option
 
   return MatchOperand_Success;
 }
 
 OperandMatchResultTy
+AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) { 
+  MCAsmParser &Parser = getParser(); 
+  const AsmToken &Tok = Parser.getTok(); 
+ 
+  assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands"); 
+  if (Mnemonic != "dsb") 
+    return MatchOperand_ParseFail; 
+ 
+  if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) { 
+    // Immediate operand. 
+    const MCExpr *ImmVal; 
+    SMLoc ExprLoc = getLoc(); 
+    if (getParser().parseExpression(ImmVal)) 
+      return MatchOperand_ParseFail; 
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); 
+    if (!MCE) { 
+      Error(ExprLoc, "immediate value expected for barrier operand"); 
+      return MatchOperand_ParseFail; 
+    } 
+    int64_t Value = MCE->getValue(); 
+    // v8.7-A DSB in the nXS variant accepts only the following immediate 
+    // values: 16, 20, 24, 28. 
+    if (Value != 16 && Value != 20 && Value != 24 && Value != 28) { 
+      Error(ExprLoc, "barrier operand out of range"); 
+      return MatchOperand_ParseFail; 
+    } 
+    auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value); 
+    Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name, 
+                                                     ExprLoc, getContext(), 
+                                                     true /*hasnXSModifier*/)); 
+    return MatchOperand_Success; 
+  } 
+ 
+  if (Tok.isNot(AsmToken::Identifier)) { 
+    TokError("invalid operand for instruction"); 
+    return MatchOperand_ParseFail; 
+  } 
+ 
+  StringRef Operand = Tok.getString(); 
+  auto DB = AArch64DBnXS::lookupDBnXSByName(Operand); 
+ 
+  if (!DB) { 
+    TokError("invalid barrier option name"); 
+    return MatchOperand_ParseFail; 
+  } 
+ 
+  Operands.push_back( 
+      AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(), 
+                                    getContext(), true /*hasnXSModifier*/)); 
+  Parser.Lex(); // Consume the option 
+ 
+  return MatchOperand_Success; 
+} 
+ 
+OperandMatchResultTy 
 AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -3438,7 +3438,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
                   .Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC)
                   .Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12)
                   .Case("got", AArch64MCExpr::VK_GOT_PAGE)
-                  .Case("gotpage_lo15", AArch64MCExpr::VK_GOT_PAGE_LO15)
+                  .Case("gotpage_lo15", AArch64MCExpr::VK_GOT_PAGE_LO15) 
                   .Case("got_lo12", AArch64MCExpr::VK_GOT_LO12)
                   .Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE)
                   .Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC)
@@ -3707,17 +3707,17 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
   return Error(getLoc(), "expected 'vl' or '#<imm>'");
 }
 
-bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
-  MCAsmParser &Parser = getParser();
-  auto Tok = Parser.getTok();
-  if (Tok.isNot(AsmToken::Identifier))
-    return true;
-  Operands.push_back(AArch64Operand::CreateToken(Tok.getString(), false,
-                                                 Tok.getLoc(), getContext()));
-  Parser.Lex();
-  return false;
-}
-
+bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) { 
+  MCAsmParser &Parser = getParser(); 
+  auto Tok = Parser.getTok(); 
+  if (Tok.isNot(AsmToken::Identifier)) 
+    return true; 
+  Operands.push_back(AArch64Operand::CreateToken(Tok.getString(), false, 
+                                                 Tok.getLoc(), getContext())); 
+  Parser.Lex(); 
+  return false; 
+} 
+ 
 /// parseOperand - Parse a arm instruction operand.  For now this parses the
 /// operand regardless of the mnemonic.
 bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
@@ -3782,11 +3782,11 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     if (GotShift != MatchOperand_NoMatch)
       return GotShift;
 
-    // If this is a two-word mnemonic, parse its special keyword
-    // operand as an identifier.
-    if (Mnemonic == "brb")
-      return parseKeywordOperand(Operands);
-
+    // If this is a two-word mnemonic, parse its special keyword 
+    // operand as an identifier. 
+    if (Mnemonic == "brb") 
+      return parseKeywordOperand(Operands); 
+ 
     // This was not a register so parse other operands that start with an
     // identifier (like labels) as expressions and create them as immediates.
     const MCExpr *IdVal;
@@ -3895,66 +3895,66 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
   }
 }
 
-bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
-  const MCExpr *Expr = nullptr;
-  SMLoc L = getLoc();
-  if (check(getParser().parseExpression(Expr), L, "expected expression"))
-    return true;
-  const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
-  if (check(!Value, L, "expected constant expression"))
-    return true;
-  Out = Value->getValue();
-  return false;
-}
-
-bool AArch64AsmParser::parseComma() {
-  if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
-            "expected comma"))
-    return true;
-  // Eat the comma
-  getParser().Lex();
-  return false;
-}
-
-bool AArch64AsmParser::parseRegisterInRange(unsigned &Out, unsigned Base,
-                                            unsigned First, unsigned Last) {
-  unsigned Reg;
-  SMLoc Start, End;
-  if (check(ParseRegister(Reg, Start, End), getLoc(), "expected register"))
-    return true;
-
-  // Special handling for FP and LR; they aren't linearly after x28 in
-  // the registers enum.
-  unsigned RangeEnd = Last;
-  if (Base == AArch64::X0) {
-    if (Last == AArch64::FP) {
-      RangeEnd = AArch64::X28;
-      if (Reg == AArch64::FP) {
-        Out = 29;
-        return false;
-      }
-    }
-    if (Last == AArch64::LR) {
-      RangeEnd = AArch64::X28;
-      if (Reg == AArch64::FP) {
-        Out = 29;
-        return false;
-      } else if (Reg == AArch64::LR) {
-        Out = 30;
-        return false;
-      }
-    }
-  }
-
-  if (check(Reg < First || Reg > RangeEnd, Start,
-            Twine("expected register in range ") +
-                AArch64InstPrinter::getRegisterName(First) + " to " +
-                AArch64InstPrinter::getRegisterName(Last)))
-    return true;
-  Out = Reg - Base;
-  return false;
-}
-
+bool AArch64AsmParser::parseImmExpr(int64_t &Out) { 
+  const MCExpr *Expr = nullptr; 
+  SMLoc L = getLoc(); 
+  if (check(getParser().parseExpression(Expr), L, "expected expression")) 
+    return true; 
+  const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr); 
+  if (check(!Value, L, "expected constant expression")) 
+    return true; 
+  Out = Value->getValue(); 
+  return false; 
+} 
+ 
+bool AArch64AsmParser::parseComma() { 
+  if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(), 
+            "expected comma")) 
+    return true; 
+  // Eat the comma 
+  getParser().Lex(); 
+  return false; 
+} 
+ 
+bool AArch64AsmParser::parseRegisterInRange(unsigned &Out, unsigned Base, 
+                                            unsigned First, unsigned Last) { 
+  unsigned Reg; 
+  SMLoc Start, End; 
+  if (check(ParseRegister(Reg, Start, End), getLoc(), "expected register")) 
+    return true; 
+ 
+  // Special handling for FP and LR; they aren't linearly after x28 in 
+  // the registers enum. 
+  unsigned RangeEnd = Last; 
+  if (Base == AArch64::X0) { 
+    if (Last == AArch64::FP) { 
+      RangeEnd = AArch64::X28; 
+      if (Reg == AArch64::FP) { 
+        Out = 29; 
+        return false; 
+      } 
+    } 
+    if (Last == AArch64::LR) { 
+      RangeEnd = AArch64::X28; 
+      if (Reg == AArch64::FP) { 
+        Out = 29; 
+        return false; 
+      } else if (Reg == AArch64::LR) { 
+        Out = 30; 
+        return false; 
+      } 
+    } 
+  } 
+ 
+  if (check(Reg < First || Reg > RangeEnd, Start, 
+            Twine("expected register in range ") + 
+                AArch64InstPrinter::getRegisterName(First) + " to " + 
+                AArch64InstPrinter::getRegisterName(Last))) 
+    return true; 
+  Out = Reg - Base; 
+  return false; 
+} 
+ 
 bool AArch64AsmParser::regsEqual(const MCParsedAsmOperand &Op1,
                                  const MCParsedAsmOperand &Op2) const {
   auto &AOp1 = static_cast<const AArch64Operand&>(Op1);
@@ -5273,7 +5273,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
   const MCObjectFileInfo::Environment Format =
     getContext().getObjectFileInfo()->getObjectFileType();
   bool IsMachO = Format == MCObjectFileInfo::IsMachO;
-  bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
+  bool IsCOFF = Format == MCObjectFileInfo::IsCOFF; 
 
   auto IDVal = DirectiveID.getIdentifier().lower();
   SMLoc Loc = DirectiveID.getLoc();
@@ -5302,57 +5302,57 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
       parseDirectiveLOH(IDVal, Loc);
     else
       return true;
-  } else if (IsCOFF) {
-    if (IDVal == ".seh_stackalloc")
-      parseDirectiveSEHAllocStack(Loc);
-    else if (IDVal == ".seh_endprologue")
-      parseDirectiveSEHPrologEnd(Loc);
-    else if (IDVal == ".seh_save_r19r20_x")
-      parseDirectiveSEHSaveR19R20X(Loc);
-    else if (IDVal == ".seh_save_fplr")
-      parseDirectiveSEHSaveFPLR(Loc);
-    else if (IDVal == ".seh_save_fplr_x")
-      parseDirectiveSEHSaveFPLRX(Loc);
-    else if (IDVal == ".seh_save_reg")
-      parseDirectiveSEHSaveReg(Loc);
-    else if (IDVal == ".seh_save_reg_x")
-      parseDirectiveSEHSaveRegX(Loc);
-    else if (IDVal == ".seh_save_regp")
-      parseDirectiveSEHSaveRegP(Loc);
-    else if (IDVal == ".seh_save_regp_x")
-      parseDirectiveSEHSaveRegPX(Loc);
-    else if (IDVal == ".seh_save_lrpair")
-      parseDirectiveSEHSaveLRPair(Loc);
-    else if (IDVal == ".seh_save_freg")
-      parseDirectiveSEHSaveFReg(Loc);
-    else if (IDVal == ".seh_save_freg_x")
-      parseDirectiveSEHSaveFRegX(Loc);
-    else if (IDVal == ".seh_save_fregp")
-      parseDirectiveSEHSaveFRegP(Loc);
-    else if (IDVal == ".seh_save_fregp_x")
-      parseDirectiveSEHSaveFRegPX(Loc);
-    else if (IDVal == ".seh_set_fp")
-      parseDirectiveSEHSetFP(Loc);
-    else if (IDVal == ".seh_add_fp")
-      parseDirectiveSEHAddFP(Loc);
-    else if (IDVal == ".seh_nop")
-      parseDirectiveSEHNop(Loc);
-    else if (IDVal == ".seh_save_next")
-      parseDirectiveSEHSaveNext(Loc);
-    else if (IDVal == ".seh_startepilogue")
-      parseDirectiveSEHEpilogStart(Loc);
-    else if (IDVal == ".seh_endepilogue")
-      parseDirectiveSEHEpilogEnd(Loc);
-    else if (IDVal == ".seh_trap_frame")
-      parseDirectiveSEHTrapFrame(Loc);
-    else if (IDVal == ".seh_pushframe")
-      parseDirectiveSEHMachineFrame(Loc);
-    else if (IDVal == ".seh_context")
-      parseDirectiveSEHContext(Loc);
-    else if (IDVal == ".seh_clear_unwound_to_call")
-      parseDirectiveSEHClearUnwoundToCall(Loc);
-    else
-      return true;
+  } else if (IsCOFF) { 
+    if (IDVal == ".seh_stackalloc") 
+      parseDirectiveSEHAllocStack(Loc); 
+    else if (IDVal == ".seh_endprologue") 
+      parseDirectiveSEHPrologEnd(Loc); 
+    else if (IDVal == ".seh_save_r19r20_x") 
+      parseDirectiveSEHSaveR19R20X(Loc); 
+    else if (IDVal == ".seh_save_fplr") 
+      parseDirectiveSEHSaveFPLR(Loc); 
+    else if (IDVal == ".seh_save_fplr_x") 
+      parseDirectiveSEHSaveFPLRX(Loc); 
+    else if (IDVal == ".seh_save_reg") 
+      parseDirectiveSEHSaveReg(Loc); 
+    else if (IDVal == ".seh_save_reg_x") 
+      parseDirectiveSEHSaveRegX(Loc); 
+    else if (IDVal == ".seh_save_regp") 
+      parseDirectiveSEHSaveRegP(Loc); 
+    else if (IDVal == ".seh_save_regp_x") 
+      parseDirectiveSEHSaveRegPX(Loc); 
+    else if (IDVal == ".seh_save_lrpair") 
+      parseDirectiveSEHSaveLRPair(Loc); 
+    else if (IDVal == ".seh_save_freg") 
+      parseDirectiveSEHSaveFReg(Loc); 
+    else if (IDVal == ".seh_save_freg_x") 
+      parseDirectiveSEHSaveFRegX(Loc); 
+    else if (IDVal == ".seh_save_fregp") 
+      parseDirectiveSEHSaveFRegP(Loc); 
+    else if (IDVal == ".seh_save_fregp_x") 
+      parseDirectiveSEHSaveFRegPX(Loc); 
+    else if (IDVal == ".seh_set_fp") 
+      parseDirectiveSEHSetFP(Loc); 
+    else if (IDVal == ".seh_add_fp") 
+      parseDirectiveSEHAddFP(Loc); 
+    else if (IDVal == ".seh_nop") 
+      parseDirectiveSEHNop(Loc); 
+    else if (IDVal == ".seh_save_next") 
+      parseDirectiveSEHSaveNext(Loc); 
+    else if (IDVal == ".seh_startepilogue") 
+      parseDirectiveSEHEpilogStart(Loc); 
+    else if (IDVal == ".seh_endepilogue") 
+      parseDirectiveSEHEpilogEnd(Loc); 
+    else if (IDVal == ".seh_trap_frame") 
+      parseDirectiveSEHTrapFrame(Loc); 
+    else if (IDVal == ".seh_pushframe") 
+      parseDirectiveSEHMachineFrame(Loc); 
+    else if (IDVal == ".seh_context") 
+      parseDirectiveSEHContext(Loc); 
+    else if (IDVal == ".seh_clear_unwound_to_call") 
+      parseDirectiveSEHClearUnwoundToCall(Loc); 
+    else 
+      return true; 
   } else
     return true;
   return false;
@@ -5360,8 +5360,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
 
 static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
                             SmallVector<StringRef, 4> &RequestedExtensions) {
-  const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto");
-  const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto");
+  const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto"); 
+  const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto"); 
 
   if (!NoCrypto && Crypto) {
     switch (ArchKind) {
@@ -5377,8 +5377,8 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
     case AArch64::ArchKind::ARMV8_4A:
     case AArch64::ArchKind::ARMV8_5A:
     case AArch64::ArchKind::ARMV8_6A:
-    case AArch64::ArchKind::ARMV8_7A:
-    case AArch64::ArchKind::ARMV8R:
+    case AArch64::ArchKind::ARMV8_7A: 
+    case AArch64::ArchKind::ARMV8R: 
       RequestedExtensions.push_back("sm4");
       RequestedExtensions.push_back("sha3");
       RequestedExtensions.push_back("sha2");
@@ -5399,7 +5399,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
     case AArch64::ArchKind::ARMV8_4A:
     case AArch64::ArchKind::ARMV8_5A:
     case AArch64::ArchKind::ARMV8_6A:
-    case AArch64::ArchKind::ARMV8_7A:
+    case AArch64::ArchKind::ARMV8_7A: 
       RequestedExtensions.push_back("nosm4");
       RequestedExtensions.push_back("nosha3");
       RequestedExtensions.push_back("nosha2");
@@ -5433,8 +5433,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
 
   MCSubtargetInfo &STI = copySTI();
   std::vector<std::string> ArchFeatures(AArch64Features.begin(), AArch64Features.end());
-  STI.setDefaultFeatures("generic", /*TuneCPU*/ "generic",
-                         join(ArchFeatures.begin(), ArchFeatures.end(), ","));
+  STI.setDefaultFeatures("generic", /*TuneCPU*/ "generic", 
+                         join(ArchFeatures.begin(), ArchFeatures.end(), ",")); 
 
   SmallVector<StringRef, 4> RequestedExtensions;
   if (!ExtensionString.empty())
@@ -5536,7 +5536,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
   }
 
   MCSubtargetInfo &STI = copySTI();
-  STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+  STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, ""); 
   CurLoc = incrementLoc(CurLoc, CPU.size());
 
   ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions);
@@ -5804,238 +5804,238 @@ bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
   return false;
 }
 
-/// parseDirectiveSEHAllocStack
-/// ::= .seh_stackalloc
-bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) {
-  int64_t Size;
-  if (parseImmExpr(Size))
-    return true;
-  getTargetStreamer().EmitARM64WinCFIAllocStack(Size);
-  return false;
-}
-
-/// parseDirectiveSEHPrologEnd
-/// ::= .seh_endprologue
-bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIPrologEnd();
-  return false;
-}
-
-/// parseDirectiveSEHSaveR19R20X
-/// ::= .seh_save_r19r20_x
-bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) {
-  int64_t Offset;
-  if (parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFPLR
-/// ::= .seh_save_fplr
-bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) {
-  int64_t Offset;
-  if (parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFPLR(Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFPLRX
-/// ::= .seh_save_fplr_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFPLRX(SMLoc L) {
-  int64_t Offset;
-  if (parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFPLRX(Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveReg
-/// ::= .seh_save_reg
-bool AArch64AsmParser::parseDirectiveSEHSaveReg(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveReg(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveRegX
-/// ::= .seh_save_reg_x
-bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveRegX(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveRegP
-/// ::= .seh_save_regp
-bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveRegPX
-/// ::= .seh_save_regp_x
-bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveLRPair
-/// ::= .seh_save_lrpair
-bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  L = getLoc();
-  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  if (check(((Reg - 19) % 2 != 0), L,
-            "expected register with even offset from x19"))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFReg
-/// ::= .seh_save_freg
-bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFReg(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFRegX
-/// ::= .seh_save_freg_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFRegX(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFRegP
-/// ::= .seh_save_fregp
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSaveFRegPX
-/// ::= .seh_save_fregp_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
-  unsigned Reg;
-  int64_t Offset;
-  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
-      parseComma() || parseImmExpr(Offset))
-    return true;
-  getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);
-  return false;
-}
-
-/// parseDirectiveSEHSetFP
-/// ::= .seh_set_fp
-bool AArch64AsmParser::parseDirectiveSEHSetFP(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFISetFP();
-  return false;
-}
-
-/// parseDirectiveSEHAddFP
-/// ::= .seh_add_fp
-bool AArch64AsmParser::parseDirectiveSEHAddFP(SMLoc L) {
-  int64_t Size;
-  if (parseImmExpr(Size))
-    return true;
-  getTargetStreamer().EmitARM64WinCFIAddFP(Size);
-  return false;
-}
-
-/// parseDirectiveSEHNop
-/// ::= .seh_nop
-bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFINop();
-  return false;
-}
-
-/// parseDirectiveSEHSaveNext
-/// ::= .seh_save_next
-bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFISaveNext();
-  return false;
-}
-
-/// parseDirectiveSEHEpilogStart
-/// ::= .seh_startepilogue
-bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIEpilogStart();
-  return false;
-}
-
-/// parseDirectiveSEHEpilogEnd
-/// ::= .seh_endepilogue
-bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIEpilogEnd();
-  return false;
-}
-
-/// parseDirectiveSEHTrapFrame
-/// ::= .seh_trap_frame
-bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFITrapFrame();
-  return false;
-}
-
-/// parseDirectiveSEHMachineFrame
-/// ::= .seh_pushframe
-bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIMachineFrame();
-  return false;
-}
-
-/// parseDirectiveSEHContext
-/// ::= .seh_context
-bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIContext();
-  return false;
-}
-
-/// parseDirectiveSEHClearUnwoundToCall
-/// ::= .seh_clear_unwound_to_call
-bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) {
-  getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall();
-  return false;
-}
-
+/// parseDirectiveSEHAllocStack 
+/// ::= .seh_stackalloc 
+bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) { 
+  int64_t Size; 
+  if (parseImmExpr(Size)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFIAllocStack(Size); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHPrologEnd 
+/// ::= .seh_endprologue 
+bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIPrologEnd(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveR19R20X 
+/// ::= .seh_save_r19r20_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) { 
+  int64_t Offset; 
+  if (parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFPLR 
+/// ::= .seh_save_fplr 
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) { 
+  int64_t Offset; 
+  if (parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFPLR(Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFPLRX 
+/// ::= .seh_save_fplr_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLRX(SMLoc L) { 
+  int64_t Offset; 
+  if (parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFPLRX(Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveReg 
+/// ::= .seh_save_reg 
+bool AArch64AsmParser::parseDirectiveSEHSaveReg(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveReg(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveRegX 
+/// ::= .seh_save_reg_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveRegX(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveRegP 
+/// ::= .seh_save_regp 
+bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveRegPX 
+/// ::= .seh_save_regp_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveLRPair 
+/// ::= .seh_save_lrpair 
+bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  L = getLoc(); 
+  if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  if (check(((Reg - 19) % 2 != 0), L, 
+            "expected register with even offset from x19")) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFReg 
+/// ::= .seh_save_freg 
+bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFReg(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFRegX 
+/// ::= .seh_save_freg_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFRegX(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFRegP 
+/// ::= .seh_save_fregp 
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveFRegPX 
+/// ::= .seh_save_fregp_x 
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) { 
+  unsigned Reg; 
+  int64_t Offset; 
+  if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || 
+      parseComma() || parseImmExpr(Offset)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSetFP 
+/// ::= .seh_set_fp 
+bool AArch64AsmParser::parseDirectiveSEHSetFP(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFISetFP(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHAddFP 
+/// ::= .seh_add_fp 
+bool AArch64AsmParser::parseDirectiveSEHAddFP(SMLoc L) { 
+  int64_t Size; 
+  if (parseImmExpr(Size)) 
+    return true; 
+  getTargetStreamer().EmitARM64WinCFIAddFP(Size); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHNop 
+/// ::= .seh_nop 
+bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFINop(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHSaveNext 
+/// ::= .seh_save_next 
+bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFISaveNext(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHEpilogStart 
+/// ::= .seh_startepilogue 
+bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIEpilogStart(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHEpilogEnd 
+/// ::= .seh_endepilogue 
+bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIEpilogEnd(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHTrapFrame 
+/// ::= .seh_trap_frame 
+bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFITrapFrame(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHMachineFrame 
+/// ::= .seh_pushframe 
+bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIMachineFrame(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHContext 
+/// ::= .seh_context 
+bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIContext(); 
+  return false; 
+} 
+ 
+/// parseDirectiveSEHClearUnwoundToCall 
+/// ::= .seh_clear_unwound_to_call 
+bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) { 
+  getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall(); 
+  return false; 
+} 
+ 
 bool
 AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
                                     AArch64MCExpr::VariantKind &ELFRefKind,
@@ -6323,26 +6323,26 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
 
   return MatchOperand_Success;
 }
-
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
-  SMLoc SS = getLoc();
-
-  unsigned XReg;
-  if (tryParseScalarRegister(XReg) != MatchOperand_Success)
-    return MatchOperand_NoMatch;
-
-  MCContext &ctx = getContext();
-  const MCRegisterInfo *RI = ctx.getRegisterInfo();
-  int X8Reg = RI->getMatchingSuperReg(
-      XReg, AArch64::x8sub_0,
-      &AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]);
-  if (!X8Reg) {
-    Error(SS, "expected an even-numbered x-register in the range [x0,x22]");
-    return MatchOperand_ParseFail;
-  }
-
-  Operands.push_back(
-      AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx));
-  return MatchOperand_Success;
-}
+ 
+OperandMatchResultTy 
+AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) { 
+  SMLoc SS = getLoc(); 
+ 
+  unsigned XReg; 
+  if (tryParseScalarRegister(XReg) != MatchOperand_Success) 
+    return MatchOperand_NoMatch; 
+ 
+  MCContext &ctx = getContext(); 
+  const MCRegisterInfo *RI = ctx.getRegisterInfo(); 
+  int X8Reg = RI->getMatchingSuperReg( 
+      XReg, AArch64::x8sub_0, 
+      &AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]); 
+  if (!X8Reg) { 
+    Error(SS, "expected an even-numbered x-register in the range [x0,x22]"); 
+    return MatchOperand_ParseFail; 
+  } 
+ 
+  Operands.push_back( 
+      AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx)); 
+  return MatchOperand_Success; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
index 512f510d85..c9421c4c06 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/MC/MCParser
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
-    contrib/libs/llvm12/lib/Target/AArch64/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/MC/MCParser 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo 
+    contrib/libs/llvm12/lib/Target/AArch64/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64/AsmParser
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64/AsmParser 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index dca76f8457..72f9968681 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -62,10 +62,10 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
                                              uint64_t Address,
                                              const void *Decoder);
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
-                                                    unsigned RegNo,
-                                                    uint64_t Address,
-                                                    const void *Decoder);
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst, 
+                                                    unsigned RegNo, 
+                                                    uint64_t Address, 
+                                                    const void *Decoder); 
 static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
@@ -271,16 +271,16 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
   uint32_t Insn =
       (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
 
-  const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
-
-  for (auto Table : Tables) {
-    DecodeStatus Result =
-        decodeInstruction(Table, MI, Insn, Address, this, STI);
-    if (Result != MCDisassembler::Fail)
-      return Result;
-  }
-
-  return MCDisassembler::Fail;
+  const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32}; 
+ 
+  for (auto Table : Tables) { 
+    DecodeStatus Result = 
+        decodeInstruction(Table, MI, Insn, Address, this, STI); 
+    if (Result != MCDisassembler::Fail) 
+      return Result; 
+  } 
+ 
+  return MCDisassembler::Fail; 
 }
 
 static MCSymbolizer *
@@ -461,35 +461,35 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
   return Success;
 }
 
-static const unsigned GPR64x8DecoderTable[] = {
-  AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
-  AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
-  AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
-  AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
-  AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
-  AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
-  AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
-  AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
-  AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
-  AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
-  AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
-  AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
-};
-
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
-                                                    unsigned RegNo,
-                                                    uint64_t Address,
-                                                    const void *Decoder) {
-  if (RegNo > 22)
-    return Fail;
-  if (RegNo & 1)
-    return Fail;
-
-  unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
-  Inst.addOperand(MCOperand::createReg(Register));
-  return Success;
-}
-
+static const unsigned GPR64x8DecoderTable[] = { 
+  AArch64::X0_X1_X2_X3_X4_X5_X6_X7, 
+  AArch64::X2_X3_X4_X5_X6_X7_X8_X9, 
+  AArch64::X4_X5_X6_X7_X8_X9_X10_X11, 
+  AArch64::X6_X7_X8_X9_X10_X11_X12_X13, 
+  AArch64::X8_X9_X10_X11_X12_X13_X14_X15, 
+  AArch64::X10_X11_X12_X13_X14_X15_X16_X17, 
+  AArch64::X12_X13_X14_X15_X16_X17_X18_X19, 
+  AArch64::X14_X15_X16_X17_X18_X19_X20_X21, 
+  AArch64::X16_X17_X18_X19_X20_X21_X22_X23, 
+  AArch64::X18_X19_X20_X21_X22_X23_X24_X25, 
+  AArch64::X20_X21_X22_X23_X24_X25_X26_X27, 
+  AArch64::X22_X23_X24_X25_X26_X27_X28_FP, 
+}; 
+ 
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst, 
+                                                    unsigned RegNo, 
+                                                    uint64_t Address, 
+                                                    const void *Decoder) { 
+  if (RegNo > 22) 
+    return Fail; 
+  if (RegNo & 1) 
+    return Fail; 
+ 
+  unsigned Register = GPR64x8DecoderTable[RegNo >> 1]; 
+  Inst.addOperand(MCOperand::createReg(Register)); 
+  return Success; 
+} 
+ 
 static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Addr,
                                                const void *Decoder) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
index 096b55cd68..e4da353a77 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/MC/MCDisassembler
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
-    contrib/libs/llvm12/lib/Target/AArch64/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/MC/MCDisassembler 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo 
+    contrib/libs/llvm12/lib/Target/AArch64/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64/Disassembler
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64/Disassembler 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 0f8b1d6584..7b05f70a73 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -52,10 +52,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
   : CallLowering(&TLI) {}
 
 namespace {
-struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
+struct IncomingArgHandler : public CallLowering::IncomingValueHandler { 
   IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                      CCAssignFn *AssignFn)
-      : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
+      : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {} 
 
   Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
@@ -101,7 +101,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
   /// How the physical register gets marked varies between formal
   /// parameters (it's a basic-block live-in), and a call instruction
   /// (it's an implicit-def of the BL).
-  virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
+  virtual void markPhysRegUsed(MCRegister PhysReg) = 0; 
 
   uint64_t StackUsed;
 };
@@ -111,7 +111,7 @@ struct FormalArgHandler : public IncomingArgHandler {
                    CCAssignFn *AssignFn)
     : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
 
-  void markPhysRegUsed(MCRegister PhysReg) override {
+  void markPhysRegUsed(MCRegister PhysReg) override { 
     MIRBuilder.getMRI()->addLiveIn(PhysReg);
     MIRBuilder.getMBB().addLiveIn(PhysReg);
   }
@@ -122,19 +122,19 @@ struct CallReturnHandler : public IncomingArgHandler {
                     MachineInstrBuilder MIB, CCAssignFn *AssignFn)
     : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
 
-  void markPhysRegUsed(MCRegister PhysReg) override {
+  void markPhysRegUsed(MCRegister PhysReg) override { 
     MIB.addDef(PhysReg, RegState::Implicit);
   }
 
   MachineInstrBuilder MIB;
 };
 
-struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
+struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { 
   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
                      CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
                      int FPDiff = 0)
-      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+      : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), 
         AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
         StackSize(0), SPReg(0) {}
 
@@ -187,8 +187,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
     if (!Arg.IsFixed)
       MaxSize = 0;
 
-    assert(Arg.Regs.size() == 1);
-
+    assert(Arg.Regs.size() == 1); 
+ 
     Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
                            ? extendRegister(Arg.Regs[0], VA, MaxSize)
                            : Arg.Regs[0];
@@ -274,7 +274,7 @@ void AArch64CallLowering::splitToValueTypes(
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                       const Value *Val,
                                       ArrayRef<Register> VRegs,
-                                      FunctionLoweringInfo &FLI,
+                                      FunctionLoweringInfo &FLI, 
                                       Register SwiftErrorVReg) const {
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
@@ -420,7 +420,7 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
   // Conservatively forward X8, since it might be used for an aggregate
   // return.
   if (!CCInfo.isAllocated(AArch64::X8)) {
-    Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+    Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); 
     Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
   }
 
@@ -441,7 +441,7 @@ bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
 
 bool AArch64CallLowering::lowerFormalArguments(
     MachineIRBuilder &MIRBuilder, const Function &F,
-    ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
+    ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { 
   MachineFunction &MF = MIRBuilder.getMF();
   MachineBasicBlock &MBB = MIRBuilder.getMBB();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -623,25 +623,25 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
   const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  if (Info.IsVarArg) {
-    // Be conservative and disallow variadic memory operands to match SDAG's
-    // behaviour.
-    // FIXME: If the caller's calling convention is C, then we can
-    // potentially use its argument area. However, for cases like fastcc,
-    // we can't do anything.
-    for (unsigned i = 0; i < OutLocs.size(); ++i) {
-      auto &ArgLoc = OutLocs[i];
-      if (ArgLoc.isRegLoc())
-        continue;
+  if (Info.IsVarArg) { 
+    // Be conservative and disallow variadic memory operands to match SDAG's 
+    // behaviour. 
+    // FIXME: If the caller's calling convention is C, then we can 
+    // potentially use its argument area. However, for cases like fastcc, 
+    // we can't do anything. 
+    for (unsigned i = 0; i < OutLocs.size(); ++i) { 
+      auto &ArgLoc = OutLocs[i]; 
+      if (ArgLoc.isRegLoc()) 
+        continue; 
 
       LLVM_DEBUG(
           dbgs()
-          << "... Cannot tail call vararg function with stack arguments\n");
+          << "... Cannot tail call vararg function with stack arguments\n"); 
       return false;
     }
   }
 
-  return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
+  return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs); 
 }
 
 bool AArch64CallLowering::isEligibleForTailCallOptimization(
@@ -756,7 +756,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
 
   // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
   // x16 or x17.
-  if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+  if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 
     return AArch64::TCRETURNriBTI;
 
   return AArch64::TCRETURNri;
@@ -776,7 +776,7 @@ bool AArch64CallLowering::lowerTailCall(
 
   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
   // register class. Until we can do that, we should fall back here.
-  if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
+  if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) { 
     LLVM_DEBUG(
         dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
     return false;
@@ -894,9 +894,9 @@ bool AArch64CallLowering::lowerTailCall(
   // If Callee is a reg, since it is used by a target specific instruction,
   // it must have a register class matching the constraint of that instruction.
   if (Info.Callee.isReg())
-    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
-                             *MF.getSubtarget().getRegBankInfo(), *MIB,
-                             MIB->getDesc(), Info.Callee, 0);
+    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 
+                             *MF.getSubtarget().getRegBankInfo(), *MIB, 
+                             MIB->getDesc(), Info.Callee, 0); 
 
   MF.getFrameInfo().setHasTailCall();
   Info.LoweredTailCall = true;
@@ -978,9 +978,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // instruction, it must have a register class matching the
   // constraint of that instruction.
   if (Info.Callee.isReg())
-    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
-                             *MF.getSubtarget().getRegBankInfo(), *MIB,
-                             MIB->getDesc(), Info.Callee, 0);
+    constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(), 
+                             *MF.getSubtarget().getRegBankInfo(), *MIB, 
+                             MIB->getDesc(), Info.Callee, 0); 
 
   // Finally we can copy the returned value back into its virtual-register. In
   // symmetry with the arguments, the physical register must be an
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
index 1f45c9ebc0..8054cf6b99 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -34,14 +34,14 @@ public:
   AArch64CallLowering(const AArch64TargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+                   ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, 
                    Register SwiftErrorVReg) const override;
 
   bool fallBackToDAGISel(const Function &F) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<ArrayRef<Register>> VRegs,
-                            FunctionLoweringInfo &FLI) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs, 
+                            FunctionLoweringInfo &FLI) const override; 
 
   bool lowerCall(MachineIRBuilder &MIRBuilder,
                  CallLoweringInfo &Info) const override;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index bed1136c7a..9536f0a596 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -1,29 +1,29 @@
-//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file APIs for AArch64-specific helper functions used in the GlobalISel
-/// pipeline.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-
-#include <cstdint>
-
-namespace llvm {
-namespace AArch64GISelUtils {
-
-/// \returns true if \p C is a legal immediate operand for an arithmetic
-/// instruction.
-constexpr bool isLegalArithImmed(const uint64_t C) {
-  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
-}
-
-} // namespace AArch64GISelUtils
-} // namespace llvm
-
-#endif
+//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+/// \file APIs for AArch64-specific helper functions used in the GlobalISel 
+/// pipeline. 
+//===----------------------------------------------------------------------===// 
+ 
+#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H 
+#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H 
+ 
+#include <cstdint> 
+ 
+namespace llvm { 
+namespace AArch64GISelUtils { 
+ 
+/// \returns true if \p C is a legal immediate operand for an arithmetic 
+/// instruction. 
+constexpr bool isLegalArithImmed(const uint64_t C) { 
+  return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); 
+} 
+ 
+} // namespace AArch64GISelUtils 
+} // namespace llvm 
+ 
+#endif 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fc5ef02e84..72f92065f3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,7 @@
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
 #include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -34,18 +34,18 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Instructions.h" 
+#include "llvm/IR/PatternMatch.h" 
 #include "llvm/IR/Type.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/Pass.h"
+#include "llvm/Pass.h" 
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 #define DEBUG_TYPE "aarch64-isel"
 
 using namespace llvm;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch; 
 
 namespace {
 
@@ -103,23 +103,23 @@ private:
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
-  ///@{
-  /// Helper functions for selectCompareBranch.
-  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
-                                    MachineIRBuilder &MIB) const;
-  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
-                                    MachineIRBuilder &MIB) const;
-  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
-                                    MachineIRBuilder &MIB) const;
-  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
+  ///@{ 
+  /// Helper functions for selectCompareBranch. 
+  bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 
+                                    MachineIRBuilder &MIB) const; 
+  bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 
                                   MachineBasicBlock *DstMBB,
                                   MachineIRBuilder &MIB) const;
-  ///@}
-
+  ///@} 
+ 
   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
-  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const; 
   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
   // Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -160,7 +160,7 @@ private:
   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
-  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const; 
 
   unsigned emitConstantPoolEntry(const Constant *CPVal,
                                  MachineFunction &MF) const;
@@ -173,72 +173,72 @@ private:
                                  MachineIRBuilder &MIRBuilder) const;
 
   // Emit an integer compare between LHS and RHS, which checks for Predicate.
-  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
-                                   MachineOperand &Predicate,
-                                   MachineIRBuilder &MIRBuilder) const;
-
-  /// Emit a floating point comparison between \p LHS and \p RHS.
-  /// \p Pred if given is the intended predicate to use.
-  MachineInstr *emitFPCompare(Register LHS, Register RHS,
-                              MachineIRBuilder &MIRBuilder,
-                              Optional<CmpInst::Predicate> = None) const;
-
-  MachineInstr *emitInstr(unsigned Opcode,
-                          std::initializer_list<llvm::DstOp> DstOps,
-                          std::initializer_list<llvm::SrcOp> SrcOps,
-                          MachineIRBuilder &MIRBuilder,
-                          const ComplexRendererFns &RenderFns = None) const;
-  /// Helper function to emit an add or sub instruction.
-  ///
-  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
-  /// in a specific order.
-  ///
-  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
-  ///
-  /// \code
-  ///   const std::array<std::array<unsigned, 2>, 4> Table {
-  ///    {{AArch64::ADDXri, AArch64::ADDWri},
-  ///     {AArch64::ADDXrs, AArch64::ADDWrs},
-  ///     {AArch64::ADDXrr, AArch64::ADDWrr},
-  ///     {AArch64::SUBXri, AArch64::SUBWri},
-  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
-  /// \endcode
-  ///
-  /// Each row in the table corresponds to a different addressing mode. Each
-  /// column corresponds to a different register size.
-  ///
-  /// \attention Rows must be structured as follows:
-  ///   - Row 0: The ri opcode variants
-  ///   - Row 1: The rs opcode variants
-  ///   - Row 2: The rr opcode variants
-  ///   - Row 3: The ri opcode variants for negative immediates
-  ///   - Row 4: The rx opcode variants
-  ///
-  /// \attention Columns must be structured as follows:
-  ///   - Column 0: The 64-bit opcode variants
-  ///   - Column 1: The 32-bit opcode variants
-  ///
-  /// \p Dst is the destination register of the binop to emit.
-  /// \p LHS is the left-hand operand of the binop to emit.
-  /// \p RHS is the right-hand operand of the binop to emit.
-  MachineInstr *emitAddSub(
-      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
-      Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-      MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
-                        MachineOperand &RHS,
+  MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 
+                                   MachineOperand &Predicate, 
+                                   MachineIRBuilder &MIRBuilder) const; 
+ 
+  /// Emit a floating point comparison between \p LHS and \p RHS. 
+  /// \p Pred if given is the intended predicate to use. 
+  MachineInstr *emitFPCompare(Register LHS, Register RHS, 
+                              MachineIRBuilder &MIRBuilder, 
+                              Optional<CmpInst::Predicate> = None) const; 
+ 
+  MachineInstr *emitInstr(unsigned Opcode, 
+                          std::initializer_list<llvm::DstOp> DstOps, 
+                          std::initializer_list<llvm::SrcOp> SrcOps, 
+                          MachineIRBuilder &MIRBuilder, 
+                          const ComplexRendererFns &RenderFns = None) const; 
+  /// Helper function to emit an add or sub instruction. 
+  /// 
+  /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 
+  /// in a specific order. 
+  /// 
+  /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 
+  /// 
+  /// \code 
+  ///   const std::array<std::array<unsigned, 2>, 4> Table { 
+  ///    {{AArch64::ADDXri, AArch64::ADDWri}, 
+  ///     {AArch64::ADDXrs, AArch64::ADDWrs}, 
+  ///     {AArch64::ADDXrr, AArch64::ADDWrr}, 
+  ///     {AArch64::SUBXri, AArch64::SUBWri}, 
+  ///     {AArch64::ADDXrx, AArch64::ADDWrx}}}; 
+  /// \endcode 
+  /// 
+  /// Each row in the table corresponds to a different addressing mode. Each 
+  /// column corresponds to a different register size. 
+  /// 
+  /// \attention Rows must be structured as follows: 
+  ///   - Row 0: The ri opcode variants 
+  ///   - Row 1: The rs opcode variants 
+  ///   - Row 2: The rr opcode variants 
+  ///   - Row 3: The ri opcode variants for negative immediates 
+  ///   - Row 4: The rx opcode variants 
+  /// 
+  /// \attention Columns must be structured as follows: 
+  ///   - Column 0: The 64-bit opcode variants 
+  ///   - Column 1: The 32-bit opcode variants 
+  /// 
+  /// \p Dst is the destination register of the binop to emit. 
+  /// \p LHS is the left-hand operand of the binop to emit. 
+  /// \p RHS is the right-hand operand of the binop to emit. 
+  MachineInstr *emitAddSub( 
+      const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 
+      Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+      MachineIRBuilder &MIRBuilder) const; 
+  MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 
+                        MachineOperand &RHS, 
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-                         MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+                         MachineIRBuilder &MIRBuilder) const; 
+  MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+                         MachineIRBuilder &MIRBuilder) const; 
   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
+  MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 
                         MachineIRBuilder &MIRBuilder) const;
-  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
-                           AArch64CC::CondCode CC,
-                           MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 
+                           AArch64CC::CondCode CC, 
+                           MachineIRBuilder &MIRBuilder) const; 
   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
                                      const RegisterBank &DstRB, LLT ScalarTy,
                                      Register VecReg, unsigned LaneIdx,
@@ -250,25 +250,25 @@ private:
   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
                                      MachineRegisterInfo &MRI) const;
 
-  /// Emit a CSet for an integer compare.
-  ///
-  /// \p DefReg is expected to be a 32-bit scalar register.
+  /// Emit a CSet for an integer compare. 
+  /// 
+  /// \p DefReg is expected to be a 32-bit scalar register. 
   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
                                 MachineIRBuilder &MIRBuilder) const;
-  /// Emit a CSet for a FP compare.
-  ///
-  /// \p Dst is expected to be a 32-bit scalar register.
-  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
-                                MachineIRBuilder &MIRBuilder) const;
-
-  /// Emit the overflow op for \p Opcode.
-  ///
-  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
-  /// G_USUBO, etc.
-  std::pair<MachineInstr *, AArch64CC::CondCode>
-  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
-                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
-
+  /// Emit a CSet for a FP compare. 
+  /// 
+  /// \p Dst is expected to be a 32-bit scalar register. 
+  MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 
+                                MachineIRBuilder &MIRBuilder) const; 
+
+  /// Emit the overflow op for \p Opcode. 
+  /// 
+  /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 
+  /// G_USUBO, etc. 
+  std::pair<MachineInstr *, AArch64CC::CondCode> 
+  emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 
+                 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 
+ 
   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
   /// \p IsNegative is true if the test should be "not zero".
   /// This will also optimize the test bit instruction when possible.
@@ -276,11 +276,11 @@ private:
                             MachineBasicBlock *DstMBB,
                             MachineIRBuilder &MIB) const;
 
-  /// Emit a CB(N)Z instruction which branches to \p DestMBB.
-  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
-                        MachineBasicBlock *DestMBB,
-                        MachineIRBuilder &MIB) const;
-
+  /// Emit a CB(N)Z instruction which branches to \p DestMBB. 
+  MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 
+                        MachineBasicBlock *DestMBB, 
+                        MachineIRBuilder &MIB) const; 
+ 
   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
   // We use these manually instead of using the importer since it doesn't
   // support SDNodeXForm.
@@ -577,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
     if (!ValAndVReg)
       return None;
-    Immed = ValAndVReg->Value.getSExtValue();
+    Immed = ValAndVReg->Value.getSExtValue(); 
   } else
     return None;
   return Immed;
@@ -865,7 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
 #ifndef NDEBUG
     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
     assert(ValidCopy && "Invalid copy.");
-    (void)KnownValid;
+    (void)KnownValid; 
 #endif
     return ValidCopy;
   };
@@ -1012,173 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
-MachineInstr *
-AArch64InstructionSelector::emitSelect(Register Dst, Register True,
-                                       Register False, AArch64CC::CondCode CC,
-                                       MachineIRBuilder &MIB) const {
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
-             RBI.getRegBank(True, MRI, TRI)->getID() &&
-         "Expected both select operands to have the same regbank?");
-  LLT Ty = MRI.getType(True);
-  if (Ty.isVector())
-    return nullptr;
-  const unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) &&
-         "Expected 32 bit or 64 bit select only?");
-  const bool Is32Bit = Size == 32;
-  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
-    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
-    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
-    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
-    return &*FCSel;
-  }
-
-  // By default, we'll try and emit a CSEL.
-  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
-  bool Optimized = false;
-  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
-                                 &Optimized](Register &Reg, Register &OtherReg,
-                                             bool Invert) {
-    if (Optimized)
-      return false;
-
-    // Attempt to fold:
-    //
-    // %sub = G_SUB 0, %x
-    // %select = G_SELECT cc, %reg, %sub
-    //
-    // Into:
-    // %select = CSNEG %reg, %x, cc
-    Register MatchReg;
-    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
-      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
-    // Attempt to fold:
-    //
-    // %xor = G_XOR %x, -1
-    // %select = G_SELECT cc, %reg, %xor
-    //
-    // Into:
-    // %select = CSINV %reg, %x, cc
-    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
-      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
-    // Attempt to fold:
-    //
-    // %add = G_ADD %x, 1
-    // %select = G_SELECT cc, %reg, %add
-    //
-    // Into:
-    // %select = CSINC %reg, %x, cc
-    if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
-      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-      Reg = MatchReg;
-      if (Invert) {
-        CC = AArch64CC::getInvertedCondCode(CC);
-        std::swap(Reg, OtherReg);
-      }
-      return true;
-    }
-
+MachineInstr * 
+AArch64InstructionSelector::emitSelect(Register Dst, Register True, 
+                                       Register False, AArch64CC::CondCode CC, 
+                                       MachineIRBuilder &MIB) const { 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  assert(RBI.getRegBank(False, MRI, TRI)->getID() == 
+             RBI.getRegBank(True, MRI, TRI)->getID() && 
+         "Expected both select operands to have the same regbank?"); 
+  LLT Ty = MRI.getType(True); 
+  if (Ty.isVector()) 
+    return nullptr; 
+  const unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && 
+         "Expected 32 bit or 64 bit select only?"); 
+  const bool Is32Bit = Size == 32; 
+  if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 
+    unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 
+    auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 
+    constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 
+    return &*FCSel; 
+  } 
+
+  // By default, we'll try and emit a CSEL. 
+  unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 
+  bool Optimized = false; 
+  auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 
+                                 &Optimized](Register &Reg, Register &OtherReg, 
+                                             bool Invert) { 
+    if (Optimized) 
+      return false; 
+
+    // Attempt to fold: 
+    // 
+    // %sub = G_SUB 0, %x 
+    // %select = G_SELECT cc, %reg, %sub 
+    // 
+    // Into: 
+    // %select = CSNEG %reg, %x, cc 
+    Register MatchReg; 
+    if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 
+      Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
+    // Attempt to fold: 
+    // 
+    // %xor = G_XOR %x, -1 
+    // %select = G_SELECT cc, %reg, %xor 
+    // 
+    // Into: 
+    // %select = CSINV %reg, %x, cc 
+    if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 
+      Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
+    // Attempt to fold: 
+    // 
+    // %add = G_ADD %x, 1 
+    // %select = G_SELECT cc, %reg, %add 
+    // 
+    // Into: 
+    // %select = CSINC %reg, %x, cc 
+    if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) { 
+      Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+      Reg = MatchReg; 
+      if (Invert) { 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        std::swap(Reg, OtherReg); 
+      } 
+      return true; 
+    } 
+ 
     return false;
-  };
-
-  // Helper lambda which tries to use CSINC/CSINV for the instruction when its
-  // true/false values are constants.
-  // FIXME: All of these patterns already exist in tablegen. We should be
-  // able to import these.
-  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
-                          &Optimized]() {
-    if (Optimized)
-      return false;
-    auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
-    auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
-    if (!TrueCst && !FalseCst)
-      return false;
-
-    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
-    if (TrueCst && FalseCst) {
-      int64_t T = TrueCst->Value.getSExtValue();
-      int64_t F = FalseCst->Value.getSExtValue();
-
-      if (T == 0 && F == 1) {
-        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        True = ZReg;
-        False = ZReg;
-        return true;
-      }
-
-      if (T == 0 && F == -1) {
-        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        True = ZReg;
-        False = ZReg;
-        return true;
-      }
-    }
-
-    if (TrueCst) {
-      int64_t T = TrueCst->Value.getSExtValue();
-      if (T == 1) {
-        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        True = False;
-        False = ZReg;
-        CC = AArch64CC::getInvertedCondCode(CC);
-        return true;
-      }
-
-      if (T == -1) {
-        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        True = False;
-        False = ZReg;
-        CC = AArch64CC::getInvertedCondCode(CC);
-        return true;
-      }
-    }
-
-    if (FalseCst) {
-      int64_t F = FalseCst->Value.getSExtValue();
-      if (F == 1) {
-        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
-        False = ZReg;
-        return true;
-      }
-
-      if (F == -1) {
-        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
-        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
-        False = ZReg;
-        return true;
-      }
-    }
-    return false;
-  };
-
-  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
-  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
-  Optimized |= TryOptSelectCst();
-  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
-  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
-  return &*SelectInst;
+  }; 
+ 
+  // Helper lambda which tries to use CSINC/CSINV for the instruction when its 
+  // true/false values are constants. 
+  // FIXME: All of these patterns already exist in tablegen. We should be 
+  // able to import these. 
+  auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 
+                          &Optimized]() { 
+    if (Optimized) 
+      return false; 
+    auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); 
+    auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); 
+    if (!TrueCst && !FalseCst) 
+      return false; 
+ 
+    Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 
+    if (TrueCst && FalseCst) { 
+      int64_t T = TrueCst->Value.getSExtValue(); 
+      int64_t F = FalseCst->Value.getSExtValue(); 
+ 
+      if (T == 0 && F == 1) { 
+        // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        True = ZReg; 
+        False = ZReg; 
+        return true; 
+      } 
+ 
+      if (T == 0 && F == -1) { 
+        // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        True = ZReg; 
+        False = ZReg; 
+        return true; 
+      } 
+    } 
+ 
+    if (TrueCst) { 
+      int64_t T = TrueCst->Value.getSExtValue(); 
+      if (T == 1) { 
+        // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        True = False; 
+        False = ZReg; 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        return true; 
+      } 
+ 
+      if (T == -1) { 
+        // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        True = False; 
+        False = ZReg; 
+        CC = AArch64CC::getInvertedCondCode(CC); 
+        return true; 
+      } 
+    } 
+ 
+    if (FalseCst) { 
+      int64_t F = FalseCst->Value.getSExtValue(); 
+      if (F == 1) { 
+        // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 
+        False = ZReg; 
+        return true; 
+      } 
+ 
+      if (F == -1) { 
+        // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 
+        Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 
+        False = ZReg; 
+        return true; 
+      } 
+    } 
+    return false; 
+  }; 
+ 
+  Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 
+  Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 
+  Optimized |= TryOptSelectCst(); 
+  auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 
+  constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 
+  return &*SelectInst; 
 }
 
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1308,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
       }
       if (VRegAndVal)
-        C = VRegAndVal->Value.getSExtValue();
+        C = VRegAndVal->Value.getSExtValue(); 
       break;
     }
     case TargetOpcode::G_ASHR:
@@ -1318,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
       auto VRegAndVal =
           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
       if (VRegAndVal)
-        C = VRegAndVal->Value.getSExtValue();
+        C = VRegAndVal->Value.getSExtValue(); 
       break;
     }
     }
@@ -1420,9 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
 }
 
 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
-    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
-    MachineIRBuilder &MIB) const {
-  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
+    MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 
+    MachineIRBuilder &MIB) const { 
+  assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 
   // Given something like this:
   //
   //  %x = ...Something...
@@ -1444,92 +1444,92 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
   // Check if the AND has a constant on its RHS which we can use as a mask.
   // If it's a power of 2, then it's the same as checking a specific bit.
   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
-  auto MaybeBit = getConstantVRegValWithLookThrough(
-      AndInst.getOperand(2).getReg(), *MIB.getMRI());
-  if (!MaybeBit)
+  auto MaybeBit = getConstantVRegValWithLookThrough( 
+      AndInst.getOperand(2).getReg(), *MIB.getMRI()); 
+  if (!MaybeBit) 
     return false;
 
-  int32_t Bit = MaybeBit->Value.exactLogBase2();
-  if (Bit < 0)
-    return false;
-
-  Register TestReg = AndInst.getOperand(1).getReg();
+  int32_t Bit = MaybeBit->Value.exactLogBase2(); 
+  if (Bit < 0) 
+    return false; 
 
+  Register TestReg = AndInst.getOperand(1).getReg(); 
+ 
   // Emit a TB(N)Z.
   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
   return true;
 }
 
-MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
-                                                  bool IsNegative,
-                                                  MachineBasicBlock *DestMBB,
-                                                  MachineIRBuilder &MIB) const {
-  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
-             AArch64::GPRRegBankID &&
-         "Expected GPRs only?");
-  auto Ty = MRI.getType(CompareReg);
-  unsigned Width = Ty.getSizeInBits();
-  assert(!Ty.isVector() && "Expected scalar only?");
-  assert(Width <= 64 && "Expected width to be at most 64?");
-  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
-                                          {AArch64::CBNZW, AArch64::CBNZX}};
-  unsigned Opc = OpcTable[IsNegative][Width == 64];
-  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
-  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
-  return &*BranchMI;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
-    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
-  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
-  // totally clean.  Some of them require two branches to implement.
-  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
-  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
-                Pred);
-  AArch64CC::CondCode CC1, CC2;
-  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 
+                                                  bool IsNegative, 
+                                                  MachineBasicBlock *DestMBB, 
+                                                  MachineIRBuilder &MIB) const { 
+  assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 
+             AArch64::GPRRegBankID && 
+         "Expected GPRs only?"); 
+  auto Ty = MRI.getType(CompareReg); 
+  unsigned Width = Ty.getSizeInBits(); 
+  assert(!Ty.isVector() && "Expected scalar only?"); 
+  assert(Width <= 64 && "Expected width to be at most 64?"); 
+  static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 
+                                          {AArch64::CBNZW, AArch64::CBNZX}}; 
+  unsigned Opc = OpcTable[IsNegative][Width == 64]; 
+  auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 
+  constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 
+  return &*BranchMI; 
+} 
+
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 
+    MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 
+  assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 
+  // totally clean.  Some of them require two branches to implement. 
+  auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 
+  emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 
+                Pred); 
+  AArch64CC::CondCode CC1, CC2; 
+  changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 
   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
-  if (CC2 != AArch64CC::AL)
-    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
-  I.eraseFromParent();
-  return true;
-}
-
-bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
-    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
-  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
-  //
-  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
-  // instructions will not be produced, as they are conditional branch
-  // instructions that do not set flags.
-  if (!ProduceNonFlagSettingCondBr)
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 
+  if (CC2 != AArch64CC::AL) 
+    MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 
+  I.eraseFromParent(); 
+  return true; 
+} 
+ 
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 
+    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 
+  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 
+  // 
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 
+  // instructions will not be produced, as they are conditional branch 
+  // instructions that do not set flags. 
+  if (!ProduceNonFlagSettingCondBr) 
     return false;
 
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  auto Pred =
-      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
-  Register LHS = ICmp.getOperand(2).getReg();
-  Register RHS = ICmp.getOperand(3).getReg();
-
-  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 
+  auto Pred = 
+      static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 
+  Register LHS = ICmp.getOperand(2).getReg(); 
+  Register RHS = ICmp.getOperand(3).getReg(); 
+ 
+  // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 
   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+  MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 
 
   // When we can emit a TB(N)Z, prefer that.
   //
   // Handle non-commutative condition codes first.
   // Note that we don't want to do this when we have a G_AND because it can
   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
-  if (VRegAndVal && !AndInst) {
-    int64_t C = VRegAndVal->Value.getSExtValue();
+  if (VRegAndVal && !AndInst) { 
+    int64_t C = VRegAndVal->Value.getSExtValue(); 
 
     // When we have a greater-than comparison, we can just test if the msb is
     // zero.
@@ -1550,97 +1550,97 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
     }
   }
 
-  // Attempt to handle commutative condition codes. Right now, that's only
-  // eq/ne.
-  if (ICmpInst::isEquality(Pred)) {
-    if (!VRegAndVal) {
-      std::swap(RHS, LHS);
-      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
-      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
-    }
-
-    if (VRegAndVal && VRegAndVal->Value == 0) {
-      // If there's a G_AND feeding into this branch, try to fold it away by
-      // emitting a TB(N)Z instead.
-      //
-      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
-      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
-      // would be redundant.
-      if (AndInst &&
-          tryOptAndIntoCompareBranch(
-              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
-        I.eraseFromParent();
-        return true;
-      }
-
-      // Otherwise, try to emit a CB(N)Z instead.
-      auto LHSTy = MRI.getType(LHS);
-      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
-        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
-        I.eraseFromParent();
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
-    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
-  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
-  assert(I.getOpcode() == TargetOpcode::G_BRCOND);
-  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
+  // Attempt to handle commutative condition codes. Right now, that's only 
+  // eq/ne. 
+  if (ICmpInst::isEquality(Pred)) { 
+    if (!VRegAndVal) { 
+      std::swap(RHS, LHS); 
+      VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 
+      AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 
+    } 
+ 
+    if (VRegAndVal && VRegAndVal->Value == 0) { 
+      // If there's a G_AND feeding into this branch, try to fold it away by 
+      // emitting a TB(N)Z instead. 
+      // 
+      // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 
+      // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 
+      // would be redundant. 
+      if (AndInst && 
+          tryOptAndIntoCompareBranch( 
+              *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 
+        I.eraseFromParent(); 
+        return true; 
+      } 
+ 
+      // Otherwise, try to emit a CB(N)Z instead. 
+      auto LHSTy = MRI.getType(LHS); 
+      if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 
+        emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 
+        I.eraseFromParent(); 
+        return true; 
+      } 
+    } 
+  }
+
+  return false; 
+} 
+ 
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 
+    MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 
+  assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 
+  assert(I.getOpcode() == TargetOpcode::G_BRCOND); 
+  if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 
     return true;
-
-  // Couldn't optimize. Emit a compare + a Bcc.
-  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
-  auto PredOp = ICmp.getOperand(1);
-  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
-  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
-      static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
-  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
-  I.eraseFromParent();
-  return true;
-}
-
-bool AArch64InstructionSelector::selectCompareBranch(
-    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
-  Register CondReg = I.getOperand(0).getReg();
-  MachineInstr *CCMI = MRI.getVRegDef(CondReg);
-  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
-    CondReg = CCMI->getOperand(1).getReg();
-    CCMI = MRI.getVRegDef(CondReg);
-  }
-
-  // Try to select the G_BRCOND using whatever is feeding the condition if
-  // possible.
-  MachineIRBuilder MIB(I);
-  unsigned CCMIOpc = CCMI->getOpcode();
-  if (CCMIOpc == TargetOpcode::G_FCMP)
-    return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
-  if (CCMIOpc == TargetOpcode::G_ICMP)
-    return selectCompareBranchFedByICmp(I, *CCMI, MIB);
-
-  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
-  // instructions will not be produced, as they are conditional branch
-  // instructions that do not set flags.
-  if (ProduceNonFlagSettingCondBr) {
-    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
-                I.getOperand(1).getMBB(), MIB);
+ 
+  // Couldn't optimize. Emit a compare + a Bcc. 
+  MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 
+  auto PredOp = ICmp.getOperand(1); 
+  emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 
+  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 
+      static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 
+  MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 
+  I.eraseFromParent(); 
+  return true; 
+} 
+ 
+bool AArch64InstructionSelector::selectCompareBranch( 
+    MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 
+  Register CondReg = I.getOperand(0).getReg(); 
+  MachineInstr *CCMI = MRI.getVRegDef(CondReg); 
+  if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { 
+    CondReg = CCMI->getOperand(1).getReg(); 
+    CCMI = MRI.getVRegDef(CondReg); 
+  }
+
+  // Try to select the G_BRCOND using whatever is feeding the condition if 
+  // possible. 
+  MachineIRBuilder MIB(I); 
+  unsigned CCMIOpc = CCMI->getOpcode(); 
+  if (CCMIOpc == TargetOpcode::G_FCMP) 
+    return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 
+  if (CCMIOpc == TargetOpcode::G_ICMP) 
+    return selectCompareBranchFedByICmp(I, *CCMI, MIB); 
+ 
+  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 
+  // instructions will not be produced, as they are conditional branch 
+  // instructions that do not set flags. 
+  if (ProduceNonFlagSettingCondBr) { 
+    emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 
+                I.getOperand(1).getMBB(), MIB); 
     I.eraseFromParent();
     return true;
   }
 
-  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
-  auto TstMI =
-      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
-  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-  auto Bcc = MIB.buildInstr(AArch64::Bcc)
-                 .addImm(AArch64CC::EQ)
-                 .addMBB(I.getOperand(1).getMBB());
+  // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 
+  auto TstMI = 
+      MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 
+  constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+  auto Bcc = MIB.buildInstr(AArch64::Bcc) 
+                 .addImm(AArch64CC::EQ) 
+                 .addMBB(I.getOperand(1).getMBB()); 
   I.eraseFromParent();
-  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+  return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 
 }
 
 /// Returns the element immediate value of a vector shift operand if found.
@@ -1661,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
       return None;
 
     if (Idx == 1)
-      ImmVal = VRegAndVal->Value.getSExtValue();
-    if (ImmVal != VRegAndVal->Value.getSExtValue())
+      ImmVal = VRegAndVal->Value.getSExtValue(); 
+    if (ImmVal != VRegAndVal->Value.getSExtValue()) 
       return None;
   }
 
@@ -1725,14 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
   } else if (Ty == LLT::vector(2, 32)) {
     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
-  } else if (Ty == LLT::vector(4, 16)) {
-    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
-  } else if (Ty == LLT::vector(8, 16)) {
-    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
-  } else if (Ty == LLT::vector(16, 8)) {
-    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
-  } else if (Ty == LLT::vector(8, 8)) {
-    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
+  } else if (Ty == LLT::vector(4, 16)) { 
+    Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 
+  } else if (Ty == LLT::vector(8, 16)) { 
+    Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 
+  } else if (Ty == LLT::vector(16, 8)) { 
+    Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 
+  } else if (Ty == LLT::vector(8, 8)) { 
+    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
     return false;
@@ -1749,10 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
   return true;
 }
 
-bool AArch64InstructionSelector::selectVectorAshrLshr(
+bool AArch64InstructionSelector::selectVectorAshrLshr( 
     MachineInstr &I, MachineRegisterInfo &MRI) const {
-  assert(I.getOpcode() == TargetOpcode::G_ASHR ||
-         I.getOpcode() == TargetOpcode::G_LSHR);
+  assert(I.getOpcode() == TargetOpcode::G_ASHR || 
+         I.getOpcode() == TargetOpcode::G_LSHR); 
   Register DstReg = I.getOperand(0).getReg();
   const LLT Ty = MRI.getType(DstReg);
   Register Src1Reg = I.getOperand(1).getReg();
@@ -1761,40 +1761,40 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
   if (!Ty.isVector())
     return false;
 
-  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
-
-  // We expect the immediate case to be lowered in the PostLegalCombiner to
-  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
-
+  bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 
+ 
+  // We expect the immediate case to be lowered in the PostLegalCombiner to 
+  // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 
+ 
   // There is not a shift right register instruction, but the shift left
   // register instruction takes a signed value, where negative numbers specify a
   // right shift.
 
   unsigned Opc = 0;
   unsigned NegOpc = 0;
-  const TargetRegisterClass *RC =
-      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+  const TargetRegisterClass *RC = 
+      getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); 
   if (Ty == LLT::vector(2, 64)) {
-    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
+    Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 
     NegOpc = AArch64::NEGv2i64;
   } else if (Ty == LLT::vector(4, 32)) {
-    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
+    Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 
     NegOpc = AArch64::NEGv4i32;
   } else if (Ty == LLT::vector(2, 32)) {
-    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
+    Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 
     NegOpc = AArch64::NEGv2i32;
-  } else if (Ty == LLT::vector(4, 16)) {
-    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
-    NegOpc = AArch64::NEGv4i16;
-  } else if (Ty == LLT::vector(8, 16)) {
-    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
-    NegOpc = AArch64::NEGv8i16;
-  } else if (Ty == LLT::vector(16, 8)) {
-    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
-    NegOpc = AArch64::NEGv16i8;
-  } else if (Ty == LLT::vector(8, 8)) {
-    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
-    NegOpc = AArch64::NEGv8i8;
+  } else if (Ty == LLT::vector(4, 16)) { 
+    Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 
+    NegOpc = AArch64::NEGv4i16; 
+  } else if (Ty == LLT::vector(8, 16)) { 
+    Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 
+    NegOpc = AArch64::NEGv8i16; 
+  } else if (Ty == LLT::vector(16, 8)) { 
+    Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 
+    NegOpc = AArch64::NEGv16i8; 
+  } else if (Ty == LLT::vector(8, 8)) { 
+    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 
+    NegOpc = AArch64::NEGv8i8; 
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
     return false;
@@ -1931,40 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
     MRI.setType(DstReg, LLT::scalar(64));
     return true;
   }
-  case AArch64::G_DUP: {
-    // Convert the type from p0 to s64 to help selection.
-    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    if (!DstTy.getElementType().isPointer())
-      return false;
-    MachineIRBuilder MIB(I);
-    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
-    MRI.setType(I.getOperand(0).getReg(),
-                DstTy.changeElementType(LLT::scalar(64)));
-    MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
-    I.getOperand(1).setReg(NewSrc.getReg(0));
-    return true;
-  }
-  case TargetOpcode::G_UITOFP:
-  case TargetOpcode::G_SITOFP: {
-    // If both source and destination regbanks are FPR, then convert the opcode
-    // to G_SITOF so that the importer can select it to an fpr variant.
-    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
-    // copy.
-    Register SrcReg = I.getOperand(1).getReg();
-    LLT SrcTy = MRI.getType(SrcReg);
-    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
-    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
-      return false;
-
-    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
-      if (I.getOpcode() == TargetOpcode::G_SITOFP)
-        I.setDesc(TII.get(AArch64::G_SITOF));
-      else
-        I.setDesc(TII.get(AArch64::G_UITOF));
-      return true;
-    }
-    return false;
-  }
+  case AArch64::G_DUP: { 
+    // Convert the type from p0 to s64 to help selection. 
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 
+    if (!DstTy.getElementType().isPointer()) 
+      return false; 
+    MachineIRBuilder MIB(I); 
+    auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 
+    MRI.setType(I.getOperand(0).getReg(), 
+                DstTy.changeElementType(LLT::scalar(64))); 
+    MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 
+    I.getOperand(1).setReg(NewSrc.getReg(0)); 
+    return true; 
+  } 
+  case TargetOpcode::G_UITOFP: 
+  case TargetOpcode::G_SITOFP: { 
+    // If both source and destination regbanks are FPR, then convert the opcode 
+    // to G_SITOF so that the importer can select it to an fpr variant. 
+    // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 
+    // copy. 
+    Register SrcReg = I.getOperand(1).getReg(); 
+    LLT SrcTy = MRI.getType(SrcReg); 
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 
+    if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 
+      return false; 
+ 
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 
+      if (I.getOpcode() == TargetOpcode::G_SITOFP) 
+        I.setDesc(TII.get(AArch64::G_SITOF)); 
+      else 
+        I.setDesc(TII.get(AArch64::G_UITOF)); 
+      return true; 
+    } 
+    return false; 
+  } 
   default:
     return false;
   }
@@ -2005,14 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
     return false;
   }
-
-  // Also take the opportunity here to try to do some optimization.
-  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
-  Register NegatedReg;
-  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
-    return true;
-  I.getOperand(2).setReg(NegatedReg);
-  I.setDesc(TII.get(TargetOpcode::G_SUB));
+ 
+  // Also take the opportunity here to try to do some optimization. 
+  // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 
+  Register NegatedReg; 
+  if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 
+    return true; 
+  I.getOperand(2).setReg(NegatedReg); 
+  I.setDesc(TII.get(TargetOpcode::G_SUB)); 
   return true;
 }
 
@@ -2102,17 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   switch (I.getOpcode()) {
-  case TargetOpcode::G_BR: {
-    // If the branch jumps to the fallthrough block, don't bother emitting it.
-    // Only do this for -O0 for a good code size improvement, because when
-    // optimizations are enabled we want to leave this choice to
-    // MachineBlockPlacement.
-    bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
-    if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
-      return false;
-    I.eraseFromParent();
-    return true;
-  }
+  case TargetOpcode::G_BR: { 
+    // If the branch jumps to the fallthrough block, don't bother emitting it. 
+    // Only do this for -O0 for a good code size improvement, because when 
+    // optimizations are enabled we want to leave this choice to 
+    // MachineBlockPlacement. 
+    bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; 
+    if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) 
+      return false; 
+    I.eraseFromParent(); 
+    return true; 
+  } 
   case TargetOpcode::G_SHL:
     return earlySelectSHL(I, MRI);
   case TargetOpcode::G_CONSTANT: {
@@ -2232,8 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   MachineIRBuilder MIB(I);
 
   switch (Opcode) {
-  case TargetOpcode::G_BRCOND:
-    return selectCompareBranch(I, MF, MRI);
+  case TargetOpcode::G_BRCOND: 
+    return selectCompareBranch(I, MF, MRI); 
 
   case TargetOpcode::G_BRINDIRECT: {
     I.setDesc(TII.get(AArch64::BR));
@@ -2313,7 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const LLT s16 = LLT::scalar(16);
     const LLT s32 = LLT::scalar(32);
     const LLT s64 = LLT::scalar(64);
-    const LLT s128 = LLT::scalar(128);
+    const LLT s128 = LLT::scalar(128); 
     const LLT p0 = LLT::pointer(0, 64);
 
     const Register DefReg = I.getOperand(0).getReg();
@@ -2323,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
     // FIXME: Redundant check, but even less readable when factored out.
     if (isFP) {
-      if (Ty != s32 && Ty != s64 && Ty != s128) {
+      if (Ty != s32 && Ty != s64 && Ty != s128) { 
         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
                           << " constant, expected: " << s32 << " or " << s64
-                          << " or " << s128 << '\n');
+                          << " or " << s128 << '\n'); 
         return false;
       }
 
@@ -2339,9 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
       // The case when we have 0.0 is covered by tablegen. Reject it here so we
       // can be sure tablegen works correctly and isn't rescued by this code.
-      // 0.0 is not covered by tablegen for FP128. So we will handle this 
-      // scenario in the code here.
-      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
+      // 0.0 is not covered by tablegen for FP128. So we will handle this  
+      // scenario in the code here. 
+      if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 
         return false;
     } else {
       // s32 and s64 are covered by tablegen.
@@ -2368,17 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       // Either emit a FMOV, or emit a copy to emit a normal mov.
       const TargetRegisterClass &GPRRC =
           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
-      const TargetRegisterClass &FPRRC = 
-          DefSize == 32 ? AArch64::FPR32RegClass 
-                        : (DefSize == 64 ? AArch64::FPR64RegClass 
-                                         : AArch64::FPR128RegClass);
+      const TargetRegisterClass &FPRRC =  
+          DefSize == 32 ? AArch64::FPR32RegClass  
+                        : (DefSize == 64 ? AArch64::FPR64RegClass  
+                                         : AArch64::FPR128RegClass); 
 
       // Can we use a FMOV instruction to represent the immediate?
       if (emitFMovForFConstant(I, MRI))
         return true;
 
       // For 64b values, emit a constant pool load instead.
-      if (DefSize == 64 || DefSize == 128) {
+      if (DefSize == 64 || DefSize == 128) { 
         auto *FPImm = I.getOperand(1).getFPImm();
         MachineIRBuilder MIB(I);
         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2571,21 +2571,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
     auto &MemOp = **I.memoperands_begin();
-    uint64_t MemSizeInBytes = MemOp.getSize();
+    uint64_t MemSizeInBytes = MemOp.getSize(); 
     if (MemOp.isAtomic()) {
       // For now we just support s8 acquire loads to be able to compile stack
       // protector code.
       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
-          MemSizeInBytes == 1) {
+          MemSizeInBytes == 1) { 
         I.setDesc(TII.get(AArch64::LDARB));
         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
       }
       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
       return false;
     }
-    unsigned MemSizeInBits = MemSizeInBytes * 8;
+    unsigned MemSizeInBits = MemSizeInBytes * 8; 
 
-#ifndef NDEBUG
+#ifndef NDEBUG 
     const Register PtrReg = I.getOperand(1).getReg();
     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
     // Sanity-check the pointer register.
@@ -2598,78 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const Register ValReg = I.getOperand(0).getReg();
     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
 
-    // Helper lambda for partially selecting I. Either returns the original
-    // instruction with an updated opcode, or a new instruction.
-    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
-      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
-      const unsigned NewOpc =
-          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
-      if (NewOpc == I.getOpcode())
-        return nullptr;
-      // Check if we can fold anything into the addressing mode.
-      auto AddrModeFns =
-          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
-      if (!AddrModeFns) {
-        // Can't fold anything. Use the original instruction.
-        I.setDesc(TII.get(NewOpc));
-        I.addOperand(MachineOperand::CreateImm(0));
-        return &I;
+    // Helper lambda for partially selecting I. Either returns the original 
+    // instruction with an updated opcode, or a new instruction. 
+    auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 
+      bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; 
+      const unsigned NewOpc = 
+          selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 
+      if (NewOpc == I.getOpcode()) 
+        return nullptr; 
+      // Check if we can fold anything into the addressing mode. 
+      auto AddrModeFns = 
+          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 
+      if (!AddrModeFns) { 
+        // Can't fold anything. Use the original instruction. 
+        I.setDesc(TII.get(NewOpc)); 
+        I.addOperand(MachineOperand::CreateImm(0)); 
+        return &I; 
       }
 
-      // Folded something. Create a new instruction and return it.
-      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
-      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
-      NewInst.cloneMemRefs(I);
-      for (auto &Fn : *AddrModeFns)
-        Fn(NewInst);
-      I.eraseFromParent();
-      return &*NewInst;
-    };
+      // Folded something. Create a new instruction and return it. 
+      auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 
+      IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); 
+      NewInst.cloneMemRefs(I); 
+      for (auto &Fn : *AddrModeFns) 
+        Fn(NewInst); 
+      I.eraseFromParent(); 
+      return &*NewInst; 
+    }; 
 
-    MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
-    if (!LoadStore)
-      return false;
+    MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 
+    if (!LoadStore) 
+      return false; 
 
     // If we're storing a 0, use WZR/XZR.
-    if (Opcode == TargetOpcode::G_STORE) {
-      auto CVal = getConstantVRegValWithLookThrough(
-          LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
-          /*HandleFConstants = */ false);
-      if (CVal && CVal->Value == 0) {
-        switch (LoadStore->getOpcode()) {
-        case AArch64::STRWui:
-        case AArch64::STRHHui:
-        case AArch64::STRBBui:
-          LoadStore->getOperand(0).setReg(AArch64::WZR);
-          break;
-        case AArch64::STRXui:
-          LoadStore->getOperand(0).setReg(AArch64::XZR);
-          break;
-        }
+    if (Opcode == TargetOpcode::G_STORE) { 
+      auto CVal = getConstantVRegValWithLookThrough( 
+          LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, 
+          /*HandleFConstants = */ false); 
+      if (CVal && CVal->Value == 0) { 
+        switch (LoadStore->getOpcode()) { 
+        case AArch64::STRWui: 
+        case AArch64::STRHHui: 
+        case AArch64::STRBBui: 
+          LoadStore->getOperand(0).setReg(AArch64::WZR); 
+          break; 
+        case AArch64::STRXui: 
+          LoadStore->getOperand(0).setReg(AArch64::XZR); 
+          break; 
+        } 
       }
     }
 
     if (IsZExtLoad) {
-      // The zextload from a smaller type to i32 should be handled by the
-      // importer.
-      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
+      // The zextload from a smaller type to i32 should be handled by the 
+      // importer. 
+      if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 
         return false;
       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
-      // and zero_extend with SUBREG_TO_REG.
+      // and zero_extend with SUBREG_TO_REG. 
       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-      Register DstReg = LoadStore->getOperand(0).getReg();
-      LoadStore->getOperand(0).setReg(LdReg);
+      Register DstReg = LoadStore->getOperand(0).getReg(); 
+      LoadStore->getOperand(0).setReg(LdReg); 
 
-      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
+      MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 
       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
           .addImm(0)
           .addUse(LdReg)
           .addImm(AArch64::sub_32);
-      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 
       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
                                           MRI);
     }
-    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+    return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 
   }
 
   case TargetOpcode::G_SMULH:
@@ -2700,21 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     // operands to use appropriate classes.
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
-  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_LSHR: 
   case TargetOpcode::G_ASHR:
     if (MRI.getType(I.getOperand(0).getReg()).isVector())
-      return selectVectorAshrLshr(I, MRI);
+      return selectVectorAshrLshr(I, MRI); 
     LLVM_FALLTHROUGH;
   case TargetOpcode::G_SHL:
     if (Opcode == TargetOpcode::G_SHL &&
         MRI.getType(I.getOperand(0).getReg()).isVector())
       return selectVectorSHL(I, MRI);
     LLVM_FALLTHROUGH;
-  case TargetOpcode::G_FADD:
-  case TargetOpcode::G_FSUB:
-  case TargetOpcode::G_FMUL:
-  case TargetOpcode::G_FDIV:
-  case TargetOpcode::G_OR: {
+  case TargetOpcode::G_FADD: 
+  case TargetOpcode::G_FSUB: 
+  case TargetOpcode::G_FMUL: 
+  case TargetOpcode::G_FDIV: 
+  case TargetOpcode::G_OR: { 
     // Reject the various things we don't support yet.
     if (unsupportedBinOp(I, RBI, MRI, TRI))
       return false;
@@ -2743,24 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     I.eraseFromParent();
     return true;
   }
-  case TargetOpcode::G_SADDO:
-  case TargetOpcode::G_UADDO:
-  case TargetOpcode::G_SSUBO:
-  case TargetOpcode::G_USUBO: {
-    // Emit the operation and get the correct condition code.
+  case TargetOpcode::G_SADDO: 
+  case TargetOpcode::G_UADDO: 
+  case TargetOpcode::G_SSUBO: 
+  case TargetOpcode::G_USUBO: { 
+    // Emit the operation and get the correct condition code. 
     MachineIRBuilder MIRBuilder(I);
-    auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
-                                  I.getOperand(2), I.getOperand(3), MIRBuilder);
+    auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), 
+                                  I.getOperand(2), I.getOperand(3), MIRBuilder); 
 
     // Now, put the overflow result in the register given by the first operand
-    // to the overflow op. CSINC increments the result when the predicate is
-    // false, so to get the increment when it's true, we need to use the
-    // inverse. In this case, we want to increment when carry is set.
-    Register ZReg = AArch64::WZR;
+    // to the overflow op. CSINC increments the result when the predicate is 
+    // false, so to get the increment when it's true, we need to use the 
+    // inverse. In this case, we want to increment when carry is set. 
+    Register ZReg = AArch64::WZR; 
     auto CsetMI = MIRBuilder
                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
-                                  {ZReg, ZReg})
-                      .addImm(getInvertedCondCode(OpAndCC.second));
+                                  {ZReg, ZReg}) 
+                      .addImm(getInvertedCondCode(OpAndCC.second)); 
     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
     I.eraseFromParent();
     return true;
@@ -2768,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
   case TargetOpcode::G_PTRMASK: {
     Register MaskReg = I.getOperand(2).getReg();
-    Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+    Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); 
     // TODO: Implement arbitrary cases
     if (!MaskVal || !isShiftedMask_64(*MaskVal))
       return false;
@@ -3059,15 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     if (tryOptSelect(I))
       return true;
 
-    // Make sure to use an unused vreg instead of wzr, so that the peephole
-    // optimizations will be able to optimize these.
-    MachineIRBuilder MIB(I);
-    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
-    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
-                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
-    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-    if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
-      return false;
+    // Make sure to use an unused vreg instead of wzr, so that the peephole 
+    // optimizations will be able to optimize these. 
+    MachineIRBuilder MIB(I); 
+    Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 
+    auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 
+                     .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 
+    constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+    if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) 
+      return false; 
     I.eraseFromParent();
     return true;
   }
@@ -3082,21 +3082,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     }
 
     MachineIRBuilder MIRBuilder(I);
-    auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
-    emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
-                       MIRBuilder);
+    auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 
+    emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 
+                       MIRBuilder); 
     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
     I.eraseFromParent();
     return true;
   }
 
   case TargetOpcode::G_FCMP: {
-    MachineIRBuilder MIRBuilder(I);
-    CmpInst::Predicate Pred =
-        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
-    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
-                       MIRBuilder, Pred) ||
-        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
+    MachineIRBuilder MIRBuilder(I); 
+    CmpInst::Predicate Pred = 
+        static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 
+    if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), 
+                       MIRBuilder, Pred) || 
+        !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) 
       return false;
     I.eraseFromParent();
     return true;
@@ -3136,24 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
     }
   }
-  case AArch64::G_DUP: {
-    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
-    // imported patterns. Do it manually here. Avoiding generating s16 gpr is
-    // difficult because at RBS we may end up pessimizing the fpr case if we
-    // decided to add an anyextend to fix this. Manual selection is the most
-    // robust solution for now.
-    Register SrcReg = I.getOperand(1).getReg();
-    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
-      return false; // We expect the fpr regbank case to be imported.
-    LLT SrcTy = MRI.getType(SrcReg);
-    if (SrcTy.getSizeInBits() == 16)
-      I.setDesc(TII.get(AArch64::DUPv8i16gpr));
-    else if (SrcTy.getSizeInBits() == 8)
-      I.setDesc(TII.get(AArch64::DUPv16i8gpr));
-    else
-      return false;
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
+  case AArch64::G_DUP: { 
+    // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 
+    // imported patterns. Do it manually here. Avoiding generating s16 gpr is 
+    // difficult because at RBS we may end up pessimizing the fpr case if we 
+    // decided to add an anyextend to fix this. Manual selection is the most 
+    // robust solution for now. 
+    Register SrcReg = I.getOperand(1).getReg(); 
+    if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) 
+      return false; // We expect the fpr regbank case to be imported. 
+    LLT SrcTy = MRI.getType(SrcReg); 
+    if (SrcTy.getSizeInBits() == 16) 
+      I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 
+    else if (SrcTy.getSizeInBits() == 8) 
+      I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 
+    else 
+      return false; 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
   case TargetOpcode::G_INTRINSIC_TRUNC:
     return selectIntrinsicTrunc(I, MRI);
   case TargetOpcode::G_INTRINSIC_ROUND:
@@ -3174,52 +3174,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return selectConcatVectors(I, MRI);
   case TargetOpcode::G_JUMP_TABLE:
     return selectJumpTable(I, MRI);
-  case TargetOpcode::G_VECREDUCE_FADD:
-  case TargetOpcode::G_VECREDUCE_ADD:
-    return selectReduction(I, MRI);
-  }
-
-  return false;
-}
-
-bool AArch64InstructionSelector::selectReduction(
-    MachineInstr &I, MachineRegisterInfo &MRI) const {
-  Register VecReg = I.getOperand(1).getReg();
-  LLT VecTy = MRI.getType(VecReg);
-  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
-    unsigned Opc = 0;
-    if (VecTy == LLT::vector(16, 8))
-      Opc = AArch64::ADDVv16i8v;
-    else if (VecTy == LLT::vector(8, 16))
-      Opc = AArch64::ADDVv8i16v;
-    else if (VecTy == LLT::vector(4, 32))
-      Opc = AArch64::ADDVv4i32v;
-    else if (VecTy == LLT::vector(2, 64))
-      Opc = AArch64::ADDPv2i64p;
-    else {
-      LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
-      return false;
-    }
-    I.setDesc(TII.get(Opc));
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  case TargetOpcode::G_VECREDUCE_FADD: 
+  case TargetOpcode::G_VECREDUCE_ADD: 
+    return selectReduction(I, MRI); 
   }
 
-  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
-    unsigned Opc = 0;
-    if (VecTy == LLT::vector(2, 32))
-      Opc = AArch64::FADDPv2i32p;
-    else if (VecTy == LLT::vector(2, 64))
-      Opc = AArch64::FADDPv2i64p;
-    else {
-      LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
-      return false;
-    }
-    I.setDesc(TII.get(Opc));
-    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  }
   return false;
 }
 
+bool AArch64InstructionSelector::selectReduction( 
+    MachineInstr &I, MachineRegisterInfo &MRI) const { 
+  Register VecReg = I.getOperand(1).getReg(); 
+  LLT VecTy = MRI.getType(VecReg); 
+  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 
+    unsigned Opc = 0; 
+    if (VecTy == LLT::vector(16, 8)) 
+      Opc = AArch64::ADDVv16i8v; 
+    else if (VecTy == LLT::vector(8, 16)) 
+      Opc = AArch64::ADDVv8i16v; 
+    else if (VecTy == LLT::vector(4, 32)) 
+      Opc = AArch64::ADDVv4i32v; 
+    else if (VecTy == LLT::vector(2, 64)) 
+      Opc = AArch64::ADDPv2i64p; 
+    else { 
+      LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 
+      return false; 
+    } 
+    I.setDesc(TII.get(Opc)); 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
+ 
+  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { 
+    unsigned Opc = 0; 
+    if (VecTy == LLT::vector(2, 32)) 
+      Opc = AArch64::FADDPv2i32p; 
+    else if (VecTy == LLT::vector(2, 64)) 
+      Opc = AArch64::FADDPv2i64p; 
+    else { 
+      LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); 
+      return false; 
+    } 
+    I.setDesc(TII.get(Opc)); 
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 
+  } 
+  return false; 
+} 
+ 
 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
                                             MachineRegisterInfo &MRI) const {
   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -3230,8 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
 
   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
-
-  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
+ 
+  MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 
   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
                                       {TargetReg, ScratchReg}, {JTAddr, Index})
                            .addJumpTableIndex(JTI);
@@ -3268,20 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
   const GlobalValue &GV = *I.getOperand(1).getGlobal();
   MachineIRBuilder MIB(I);
 
-  auto LoadGOT =
-      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
-          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+  auto LoadGOT = 
+      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 
+          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 
 
   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
-                             {LoadGOT.getReg(0)})
+                             {LoadGOT.getReg(0)}) 
                   .addImm(0);
 
-  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
+  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
-      .addUse(AArch64::X0, RegState::Implicit)
+      .addUse(AArch64::X0, RegState::Implicit) 
       .addDef(AArch64::X0, RegState::Implicit)
       .addRegMask(TRI.getTLSCallPreservedMask());
 
@@ -3767,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
   (void)WideTy;
   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
          "source register size too small!");
-  assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
+  assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 
 
   // Need the lane index to determine the correct copy opcode.
   MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3782,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
   if (!VRegAndVal)
     return false;
-  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+  unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 
 
   MachineIRBuilder MIRBuilder(I);
 
@@ -4005,10 +4005,10 @@ static std::pair<unsigned, unsigned>
 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
   unsigned Opc, SubregIdx;
   if (RB.getID() == AArch64::GPRRegBankID) {
-    if (EltSize == 16) {
-      Opc = AArch64::INSvi16gpr;
-      SubregIdx = AArch64::ssub;
-    } else if (EltSize == 32) {
+    if (EltSize == 16) { 
+      Opc = AArch64::INSvi16gpr; 
+      SubregIdx = AArch64::ssub; 
+    } else if (EltSize == 32) { 
       Opc = AArch64::INSvi32gpr;
       SubregIdx = AArch64::ssub;
     } else if (EltSize == 64) {
@@ -4037,93 +4037,93 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
   return std::make_pair(Opc, SubregIdx);
 }
 
-MachineInstr *AArch64InstructionSelector::emitInstr(
-    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
-    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
-    const ComplexRendererFns &RenderFns) const {
-  assert(Opcode && "Expected an opcode?");
-  assert(!isPreISelGenericOpcode(Opcode) &&
-         "Function should only be used to produce selected instructions!");
-  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
-  if (RenderFns)
-    for (auto &Fn : *RenderFns)
-      Fn(MI);
-  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
-  return &*MI;
-}
-
-MachineInstr *AArch64InstructionSelector::emitAddSub(
-    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
-    Register Dst, MachineOperand &LHS, MachineOperand &RHS,
-    MachineIRBuilder &MIRBuilder) const {
-  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
-  auto Ty = MRI.getType(LHS.getReg());
-  assert(!Ty.isVector() && "Expected a scalar or pointer?");
-  unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
-  bool Is32Bit = Size == 32;
-
-  // INSTRri form with positive arithmetic immediate.
-  if (auto Fns = selectArithImmed(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRri form with negative arithmetic immediate.
-  if (auto Fns = selectNegArithImmed(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRrx form.
-  if (auto Fns = selectArithExtendedRegister(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-
-  // INSTRrs form.
-  if (auto Fns = selectShiftedRegister(RHS))
-    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
-                     MIRBuilder, Fns);
-  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
-                   MIRBuilder);
-}
-
+MachineInstr *AArch64InstructionSelector::emitInstr( 
+    unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 
+    std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 
+    const ComplexRendererFns &RenderFns) const { 
+  assert(Opcode && "Expected an opcode?"); 
+  assert(!isPreISelGenericOpcode(Opcode) && 
+         "Function should only be used to produce selected instructions!"); 
+  auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 
+  if (RenderFns) 
+    for (auto &Fn : *RenderFns) 
+      Fn(MI); 
+  constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 
+  return &*MI; 
+} 
+ 
+MachineInstr *AArch64InstructionSelector::emitAddSub( 
+    const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 
+    Register Dst, MachineOperand &LHS, MachineOperand &RHS, 
+    MachineIRBuilder &MIRBuilder) const { 
+  MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 
+  auto Ty = MRI.getType(LHS.getReg()); 
+  assert(!Ty.isVector() && "Expected a scalar or pointer?"); 
+  unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 
+  bool Is32Bit = Size == 32; 
+ 
+  // INSTRri form with positive arithmetic immediate. 
+  if (auto Fns = selectArithImmed(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRri form with negative arithmetic immediate. 
+  if (auto Fns = selectNegArithImmed(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRrx form. 
+  if (auto Fns = selectArithExtendedRegister(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+ 
+  // INSTRrs form. 
+  if (auto Fns = selectShiftedRegister(RHS)) 
+    return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 
+                     MIRBuilder, Fns); 
+  return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 
+                   MIRBuilder); 
+} 
+ 
 MachineInstr *
 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
                                     MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::ADDXri, AArch64::ADDWri},
-       {AArch64::ADDXrs, AArch64::ADDWrs},
-       {AArch64::ADDXrr, AArch64::ADDWrr},
-       {AArch64::SUBXri, AArch64::SUBWri},
-       {AArch64::ADDXrx, AArch64::ADDWrx}}};
-  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
-                                     MachineOperand &RHS,
-                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::ADDSXri, AArch64::ADDSWri},
-       {AArch64::ADDSXrs, AArch64::ADDSWrs},
-       {AArch64::ADDSXrr, AArch64::ADDSWrr},
-       {AArch64::SUBSXri, AArch64::SUBSWri},
-       {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
-  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
-                                     MachineOperand &RHS,
-                                     MachineIRBuilder &MIRBuilder) const {
-  const std::array<std::array<unsigned, 2>, 5> OpcTable{
-      {{AArch64::SUBSXri, AArch64::SUBSWri},
-       {AArch64::SUBSXrs, AArch64::SUBSWrs},
-       {AArch64::SUBSXrr, AArch64::SUBSWrr},
-       {AArch64::ADDSXri, AArch64::ADDSWri},
-       {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
-  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::ADDXri, AArch64::ADDWri}, 
+       {AArch64::ADDXrs, AArch64::ADDWrs}, 
+       {AArch64::ADDXrr, AArch64::ADDWrr}, 
+       {AArch64::SUBXri, AArch64::SUBWri}, 
+       {AArch64::ADDXrx, AArch64::ADDWrx}}}; 
+  return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 
+                                     MachineOperand &RHS, 
+                                     MachineIRBuilder &MIRBuilder) const { 
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::ADDSXri, AArch64::ADDSWri}, 
+       {AArch64::ADDSXrs, AArch64::ADDSWrs}, 
+       {AArch64::ADDSXrr, AArch64::ADDSWrr}, 
+       {AArch64::SUBSXri, AArch64::SUBSWri}, 
+       {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 
+                                     MachineOperand &RHS, 
+                                     MachineIRBuilder &MIRBuilder) const { 
+  const std::array<std::array<unsigned, 2>, 5> OpcTable{ 
+      {{AArch64::SUBSXri, AArch64::SUBSWri}, 
+       {AArch64::SUBSXrs, AArch64::SUBSWrs}, 
+       {AArch64::SUBSXrr, AArch64::SUBSWrr}, 
+       {AArch64::ADDSXri, AArch64::ADDSWri}, 
+       {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 
+  return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 
 }
 
 MachineInstr *
@@ -4131,129 +4131,129 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
-  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
-  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
+  auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 
+  return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 
 }
 
 MachineInstr *
-AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 
                                     MachineIRBuilder &MIRBuilder) const {
-  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  LLT Ty = MRI.getType(LHS.getReg());
-  unsigned RegSize = Ty.getSizeInBits();
+  LLT Ty = MRI.getType(LHS.getReg()); 
+  unsigned RegSize = Ty.getSizeInBits(); 
   bool Is32Bit = (RegSize == 32);
-  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
-                                   {AArch64::ANDSXrs, AArch64::ANDSWrs},
-                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}};
-  // ANDS needs a logical immediate for its immediate form. Check if we can
-  // fold one in.
-  if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
-    int64_t Imm = ValAndVReg->Value.getSExtValue();
-
-    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
-      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
-      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
-      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
-      return &*TstMI;
-    }
-  }
-
-  if (auto Fns = selectLogicalShiftedRegister(RHS))
-    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
-  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
+  const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 
+                                   {AArch64::ANDSXrs, AArch64::ANDSWrs}, 
+                                   {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 
+  // ANDS needs a logical immediate for its immediate form. Check if we can 
+  // fold one in. 
+  if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 
+    int64_t Imm = ValAndVReg->Value.getSExtValue(); 
+
+    if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 
+      auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 
+      TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 
+      constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 
+      return &*TstMI; 
+    } 
+  } 
+
+  if (auto Fns = selectLogicalShiftedRegister(RHS)) 
+    return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 
+  return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 
 }
 
-MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 
     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
     MachineIRBuilder &MIRBuilder) const {
   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
   assert(Predicate.isPredicate() && "Expected predicate?");
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
-  LLT CmpTy = MRI.getType(LHS.getReg());
-  assert(!CmpTy.isVector() && "Expected scalar or pointer");
-  unsigned Size = CmpTy.getSizeInBits();
-  (void)Size;
-  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
-  // Fold the compare into a cmn or tst if possible.
-  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
-    return FoldCmp;
-  auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
-  return emitSUBS(Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
-    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-#ifndef NDEBUG
-  LLT Ty = MRI.getType(Dst);
-  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
-         "Expected a 32-bit scalar register?");
-#endif
-  const Register ZeroReg = AArch64::WZR;
-  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
-    auto CSet =
-        MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
-            .addImm(getInvertedCondCode(CC));
-    constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
-    return &*CSet;
-  };
-
-  AArch64CC::CondCode CC1, CC2;
-  changeFCMPPredToAArch64CC(Pred, CC1, CC2);
-  if (CC2 == AArch64CC::AL)
-    return EmitCSet(Dst, CC1);
-
-  const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
-  Register Def1Reg = MRI.createVirtualRegister(RC);
-  Register Def2Reg = MRI.createVirtualRegister(RC);
-  EmitCSet(Def1Reg, CC1);
-  EmitCSet(Def2Reg, CC2);
-  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
-  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
-  return &*OrMI;
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
-                                          MachineIRBuilder &MIRBuilder,
-                                          Optional<CmpInst::Predicate> Pred) const {
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  LLT Ty = MRI.getType(LHS);
-  if (Ty.isVector())
-    return nullptr;
-  unsigned OpSize = Ty.getSizeInBits();
-  if (OpSize != 32 && OpSize != 64)
-    return nullptr;
-
-  // If this is a compare against +0.0, then we don't have
-  // to explicitly materialize a constant.
-  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
-  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
-
-  auto IsEqualityPred = [](CmpInst::Predicate P) {
-    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
-           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
-  };
-  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
-    // Try commutating the operands.
-    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
-    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
-      ShouldUseImm = true;
-      std::swap(LHS, RHS);
-    }
-  }
-  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
-                              {AArch64::FCMPSri, AArch64::FCMPDri}};
-  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
-
-  // Partially build the compare. Decide if we need to add a use for the
-  // third operand based off whether or not we're comparing against 0.0.
-  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
-  if (!ShouldUseImm)
-    CmpMI.addUse(RHS);
+  LLT CmpTy = MRI.getType(LHS.getReg()); 
+  assert(!CmpTy.isVector() && "Expected scalar or pointer"); 
+  unsigned Size = CmpTy.getSizeInBits(); 
+  (void)Size; 
+  assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 
+  // Fold the compare into a cmn or tst if possible. 
+  if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 
+    return FoldCmp; 
+  auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 
+  return emitSUBS(Dst, LHS, RHS, MIRBuilder); 
+} 
+
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 
+    Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+#ifndef NDEBUG 
+  LLT Ty = MRI.getType(Dst); 
+  assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 
+         "Expected a 32-bit scalar register?"); 
+#endif 
+  const Register ZeroReg = AArch64::WZR; 
+  auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { 
+    auto CSet = 
+        MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) 
+            .addImm(getInvertedCondCode(CC)); 
+    constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); 
+    return &*CSet; 
+  }; 
+
+  AArch64CC::CondCode CC1, CC2; 
+  changeFCMPPredToAArch64CC(Pred, CC1, CC2); 
+  if (CC2 == AArch64CC::AL) 
+    return EmitCSet(Dst, CC1); 
+
+  const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 
+  Register Def1Reg = MRI.createVirtualRegister(RC); 
+  Register Def2Reg = MRI.createVirtualRegister(RC); 
+  EmitCSet(Def1Reg, CC1); 
+  EmitCSet(Def2Reg, CC2); 
+  auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 
+  constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 
+  return &*OrMI; 
+} 
+
+MachineInstr * 
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, 
+                                          MachineIRBuilder &MIRBuilder, 
+                                          Optional<CmpInst::Predicate> Pred) const { 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+  LLT Ty = MRI.getType(LHS); 
+  if (Ty.isVector()) 
+    return nullptr; 
+  unsigned OpSize = Ty.getSizeInBits(); 
+  if (OpSize != 32 && OpSize != 64) 
+    return nullptr; 
+
+  // If this is a compare against +0.0, then we don't have 
+  // to explicitly materialize a constant. 
+  const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 
+
+  auto IsEqualityPred = [](CmpInst::Predicate P) { 
+    return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 
+           P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 
+  }; 
+  if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 
+    // Try commutating the operands. 
+    const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 
+    if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 
+      ShouldUseImm = true; 
+      std::swap(LHS, RHS); 
+    } 
+  } 
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 
+                              {AArch64::FCMPSri, AArch64::FCMPDri}}; 
+  unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 
+
+  // Partially build the compare. Decide if we need to add a use for the 
+  // third operand based off whether or not we're comparing against 0.0. 
+  auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 
+  if (!ShouldUseImm) 
+    CmpMI.addUse(RHS); 
   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
-  return &*CmpMI;
+  return &*CmpMI; 
 }
 
 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4363,25 +4363,25 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
   return &*I;
 }
 
-std::pair<MachineInstr *, AArch64CC::CondCode>
-AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
-                                           MachineOperand &LHS,
-                                           MachineOperand &RHS,
-                                           MachineIRBuilder &MIRBuilder) const {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("Unexpected opcode!");
-  case TargetOpcode::G_SADDO:
-    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
-  case TargetOpcode::G_UADDO:
-    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
-  case TargetOpcode::G_SSUBO:
-    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
-  case TargetOpcode::G_USUBO:
-    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
-  }
-}
-
+std::pair<MachineInstr *, AArch64CC::CondCode> 
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 
+                                           MachineOperand &LHS, 
+                                           MachineOperand &RHS, 
+                                           MachineIRBuilder &MIRBuilder) const { 
+  switch (Opcode) { 
+  default: 
+    llvm_unreachable("Unexpected opcode!"); 
+  case TargetOpcode::G_SADDO: 
+    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 
+  case TargetOpcode::G_UADDO: 
+    return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 
+  case TargetOpcode::G_SSUBO: 
+    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 
+  case TargetOpcode::G_USUBO: 
+    return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 
+  } 
+} 
+ 
 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
   MachineIRBuilder MIB(I);
   MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4441,17 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
 
   AArch64CC::CondCode CondCode;
   if (CondOpc == TargetOpcode::G_ICMP) {
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+    auto Pred = 
+        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 
     CondCode = changeICMPPredToAArch64CC(Pred);
-    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
-                       CondDef->getOperand(1), MIB);
+    emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 
+                       CondDef->getOperand(1), MIB); 
   } else {
     // Get the condition code for the select.
-    auto Pred =
-        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+    auto Pred = 
+        static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 
     AArch64CC::CondCode CondCode2;
-    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
+    changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 
 
     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
     // instructions to emit the comparison.
@@ -4460,16 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
     if (CondCode2 != AArch64CC::AL)
       return false;
 
-    if (!emitFPCompare(CondDef->getOperand(2).getReg(),
-                       CondDef->getOperand(3).getReg(), MIB)) {
-      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+    if (!emitFPCompare(CondDef->getOperand(2).getReg(), 
+                       CondDef->getOperand(3).getReg(), MIB)) { 
+      LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 
       return false;
-    }
+    } 
   }
 
   // Emit the select.
-  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
-             I.getOperand(3).getReg(), CondCode, MIB);
+  emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 
+             I.getOperand(3).getReg(), CondCode, MIB); 
   I.eraseFromParent();
   return true;
 }
@@ -4552,15 +4552,15 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   // Produce this if the compare is signed:
   //
   // tst x, y
-  if (!CmpInst::isUnsigned(P) && LHSDef &&
+  if (!CmpInst::isUnsigned(P) && LHSDef && 
       LHSDef->getOpcode() == TargetOpcode::G_AND) {
     // Make sure that the RHS is 0.
     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
     if (!ValAndVReg || ValAndVReg->Value != 0)
       return nullptr;
 
-    return emitTST(LHSDef->getOperand(1),
-                   LHSDef->getOperand(2), MIRBuilder);
+    return emitTST(LHSDef->getOperand(1), 
+                   LHSDef->getOperand(2), MIRBuilder); 
   }
 
   return nullptr;
@@ -4708,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
   if (!VRegAndVal)
     return false;
-  unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+  unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 
 
   // Perform the lane insert.
   Register SrcReg = I.getOperand(1).getReg();
@@ -4765,9 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
 bool AArch64InstructionSelector::tryOptConstantBuildVec(
     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
-  unsigned DstSize = DstTy.getSizeInBits();
-  assert(DstSize <= 128 && "Unexpected build_vec type!");
-  if (DstSize < 32)
+  unsigned DstSize = DstTy.getSizeInBits(); 
+  assert(DstSize <= 128 && "Unexpected build_vec type!"); 
+  if (DstSize < 32) 
     return false;
   // Check if we're building a constant vector, in which case we want to
   // generate a constant pool load instead of a vector insert sequence.
@@ -4788,24 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
   }
   Constant *CV = ConstantVector::get(Csts);
   MachineIRBuilder MIB(I);
-  if (CV->isNullValue()) {
-    // Until the importer can support immAllZerosV in pattern leaf nodes,
-    // select a zero move manually here.
-    Register DstReg = I.getOperand(0).getReg();
-    if (DstSize == 128) {
-      auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
-      I.eraseFromParent();
-      return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    } else if (DstSize == 64) {
-      auto Mov =
-          MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
-              .addImm(0);
-      MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
-          .addReg(Mov.getReg(0), 0, AArch64::dsub);
-      I.eraseFromParent();
-      return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
-    }
-  }
+  if (CV->isNullValue()) { 
+    // Until the importer can support immAllZerosV in pattern leaf nodes, 
+    // select a zero move manually here. 
+    Register DstReg = I.getOperand(0).getReg(); 
+    if (DstSize == 128) { 
+      auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); 
+      I.eraseFromParent(); 
+      return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 
+    } else if (DstSize == 64) { 
+      auto Mov = 
+          MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 
+              .addImm(0); 
+      MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 
+          .addReg(Mov.getReg(0), 0, AArch64::dsub); 
+      I.eraseFromParent(); 
+      return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); 
+    } 
+  } 
   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
   if (!CPLoad) {
     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4927,10 +4927,10 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
   case Intrinsic::debugtrap:
     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
     break;
-  case Intrinsic::ubsantrap:
-    MIRBuilder.buildInstr(AArch64::BRK, {}, {})
-        .addImm(I.getOperand(1).getImm() | ('U' << 8));
-    break;
+  case Intrinsic::ubsantrap: 
+    MIRBuilder.buildInstr(AArch64::BRK, {}, {}) 
+        .addImm(I.getOperand(1).getImm() | ('U' << 8)); 
+    break; 
   }
 
   I.eraseFromParent();
@@ -4996,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
 
     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
-      if (!MFReturnAddr) {
-        // Insert the copy from LR/X30 into the entry block, before it can be
-        // clobbered by anything.
-        MFI.setReturnAddressIsTaken(true);
-        MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
-                                                AArch64::GPR64RegClass);
-      }
-
-      if (STI.hasPAuth()) {
-        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
-      } else {
-        MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
-        MIRBuilder.buildInstr(AArch64::XPACLRI);
-        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+      if (!MFReturnAddr) { 
+        // Insert the copy from LR/X30 into the entry block, before it can be 
+        // clobbered by anything. 
+        MFI.setReturnAddressIsTaken(true); 
+        MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, 
+                                                AArch64::GPR64RegClass); 
       }
-
+ 
+      if (STI.hasPAuth()) { 
+        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 
+      } else { 
+        MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 
+        MIRBuilder.buildInstr(AArch64::XPACLRI); 
+        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 
+      } 
+ 
       I.eraseFromParent();
       return true;
     }
@@ -5031,16 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
     else {
       MFI.setReturnAddressIsTaken(true);
-
-      if (STI.hasPAuth()) {
-        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
-        MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
-        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
-      } else {
-        MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
-        MIRBuilder.buildInstr(AArch64::XPACLRI);
-        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
-      }
+ 
+      if (STI.hasPAuth()) { 
+        Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 
+        MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 
+        MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 
+      } else { 
+        MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); 
+        MIRBuilder.buildInstr(AArch64::XPACLRI); 
+        MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 
+      } 
     }
 
     I.eraseFromParent();
@@ -5248,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
 
   // The value must fit into 3 bits, and must be positive. Make sure that is
   // true.
-  int64_t ImmVal = ValAndVReg->Value.getSExtValue();
+  int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 
 
   // Since we're going to pull this into a shift, the constant value must be
   // a power of 2. If we got a multiply, then we need to check this.
@@ -5388,60 +5388,60 @@ InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
                                               unsigned SizeInBytes) const {
   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-  if (!Root.isReg())
-    return None;
-  MachineInstr *PtrAdd =
-      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
-  if (!PtrAdd)
+  if (!Root.isReg()) 
     return None;
-
-  // Check for an immediates which cannot be encoded in the [base + imm]
-  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
-  // end up with code like:
-  //
-  // mov x0, wide
-  // add x1 base, x0
-  // ldr x2, [x1, x0]
-  //
-  // In this situation, we can use the [base, xreg] addressing mode to save an
-  // add/sub:
-  //
-  // mov x0, wide
-  // ldr x2, [base, x0]
-  auto ValAndVReg =
-      getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
-  if (ValAndVReg) {
-    unsigned Scale = Log2_32(SizeInBytes);
-    int64_t ImmOff = ValAndVReg->Value.getSExtValue();
-
-    // Skip immediates that can be selected in the load/store addresing
-    // mode.
-    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
-        ImmOff < (0x1000 << Scale))
-      return None;
-
-    // Helper lambda to decide whether or not it is preferable to emit an add.
-    auto isPreferredADD = [](int64_t ImmOff) {
-      // Constants in [0x0, 0xfff] can be encoded in an add.
-      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
-        return true;
-
-      // Can it be encoded in an add lsl #12?
-      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
-        return false;
-
-      // It can be encoded in an add lsl #12, but we may not want to. If it is
-      // possible to select this as a single movz, then prefer that. A single
-      // movz is faster than an add with a shift.
-      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
-             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
-    };
-
-    // If the immediate can be encoded in a single add/sub, then bail out.
-    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
-      return None;
-  }
-
+  MachineInstr *PtrAdd = 
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 
+  if (!PtrAdd) 
+    return None; 
+
+  // Check for an immediates which cannot be encoded in the [base + imm] 
+  // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 
+  // end up with code like: 
+  // 
+  // mov x0, wide 
+  // add x1 base, x0 
+  // ldr x2, [x1, x0] 
+  // 
+  // In this situation, we can use the [base, xreg] addressing mode to save an 
+  // add/sub: 
+  // 
+  // mov x0, wide 
+  // ldr x2, [base, x0] 
+  auto ValAndVReg = 
+      getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 
+  if (ValAndVReg) { 
+    unsigned Scale = Log2_32(SizeInBytes); 
+    int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 
+ 
+    // Skip immediates that can be selected in the load/store addresing 
+    // mode. 
+    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 
+        ImmOff < (0x1000 << Scale)) 
+      return None; 
+ 
+    // Helper lambda to decide whether or not it is preferable to emit an add. 
+    auto isPreferredADD = [](int64_t ImmOff) { 
+      // Constants in [0x0, 0xfff] can be encoded in an add. 
+      if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 
+        return true; 
+ 
+      // Can it be encoded in an add lsl #12? 
+      if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 
+        return false; 
+ 
+      // It can be encoded in an add lsl #12, but we may not want to. If it is 
+      // possible to select this as a single movz, then prefer that. A single 
+      // movz is faster than an add with a shift. 
+      return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 
+             (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 
+    }; 
+ 
+    // If the immediate can be encoded in a single add/sub, then bail out. 
+    if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 
+      return None; 
+  } 
+ 
   // Try to fold shifts into the addressing mode.
   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
   if (AddrModeFns)
@@ -5871,8 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
          "Expected G_CONSTANT");
-  Optional<int64_t> CstVal =
-      getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+  Optional<int64_t> CstVal = 
+      getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 
   assert(CstVal && "Expected constant value");
   MIB.addImm(CstVal.getValue());
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5a6c904e3f..af24267bf2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,7 +14,7 @@
 #include "AArch64LegalizerInfo.h"
 #include "AArch64Subtarget.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -23,8 +23,8 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
-#include <initializer_list>
-#include "llvm/Support/MathExtras.h"
+#include <initializer_list> 
+#include "llvm/Support/MathExtras.h" 
 
 #define DEBUG_TYPE "aarch64-legalinfo"
 
@@ -56,13 +56,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const LLT v2s64 = LLT::vector(2, 64);
   const LLT v2p0 = LLT::vector(2, p0);
 
-  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
-                                                        v16s8, v8s16, v4s32,
-                                                        v2s64, v2p0,
-                                                        /* End 128bit types */
-                                                        /* Begin 64bit types */
-                                                        v8s8, v4s16, v2s32};
-
+  std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */ 
+                                                        v16s8, v8s16, v4s32, 
+                                                        v2s64, v2p0, 
+                                                        /* End 128bit types */ 
+                                                        /* Begin 64bit types */ 
+                                                        v8s8, v4s16, v2s32}; 
+ 
   const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
 
   // FIXME: support subtargets which have neon/fp-armv8 disabled.
@@ -71,31 +71,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
     return;
   }
 
-  // Some instructions only support s16 if the subtarget has full 16-bit FP
-  // support.
-  const bool HasFP16 = ST.hasFullFP16();
-  const LLT &MinFPScalar = HasFP16 ? s16 : s32;
-
+  // Some instructions only support s16 if the subtarget has full 16-bit FP 
+  // support. 
+  const bool HasFP16 = ST.hasFullFP16(); 
+  const LLT &MinFPScalar = HasFP16 ? s16 : s32; 
+ 
   getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
-      .legalFor({p0, s1, s8, s16, s32, s64})
-      .legalFor(PackedVectorAllTypeList)
-      .clampScalar(0, s1, s64)
-      .widenScalarToNextPow2(0, 8)
-      .fewerElementsIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[0].isVector() &&
-                   (Query.Types[0].getElementType() != s64 ||
-                    Query.Types[0].getNumElements() != 2);
-          },
-          [=](const LegalityQuery &Query) {
-            LLT EltTy = Query.Types[0].getElementType();
-            if (EltTy == s64)
-              return std::make_pair(0, LLT::vector(2, 64));
-            return std::make_pair(0, EltTy);
-          });
-
-  getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
-      .legalFor(PackedVectorAllTypeList)
+      .legalFor({p0, s1, s8, s16, s32, s64}) 
+      .legalFor(PackedVectorAllTypeList) 
+      .clampScalar(0, s1, s64) 
+      .widenScalarToNextPow2(0, 8) 
+      .fewerElementsIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[0].isVector() && 
+                   (Query.Types[0].getElementType() != s64 || 
+                    Query.Types[0].getNumElements() != 2); 
+          }, 
+          [=](const LegalityQuery &Query) { 
+            LLT EltTy = Query.Types[0].getElementType(); 
+            if (EltTy == s64) 
+              return std::make_pair(0, LLT::vector(2, 64)); 
+            return std::make_pair(0, EltTy); 
+          }); 
+
+  getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64}) 
+      .legalFor(PackedVectorAllTypeList) 
       .clampScalar(0, s16, s64)
       .widenScalarToNextPow2(0);
 
@@ -105,38 +105,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
-      .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
-      .scalarizeIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
-          },
-          0)
-      .legalFor({v2s64})
+      .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) 
+      .scalarizeIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Opcode == G_MUL && Query.Types[0] == v2s64; 
+          }, 
+          0) 
+      .legalFor({v2s64}) 
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0);
 
-  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
+  getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) 
       .customIf([=](const LegalityQuery &Query) {
         const auto &SrcTy = Query.Types[0];
         const auto &AmtTy = Query.Types[1];
         return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
                AmtTy.getSizeInBits() == 32;
       })
-      .legalFor({
-          {s32, s32},
-          {s32, s64},
-          {s64, s64},
-          {v8s8, v8s8},
-          {v16s8, v16s8},
-          {v4s16, v4s16},
-          {v8s16, v8s16},
-          {v2s32, v2s32},
-          {v4s32, v4s32},
-          {v2s64, v2s64},
-      })
+      .legalFor({ 
+          {s32, s32}, 
+          {s32, s64}, 
+          {s64, s64}, 
+          {v8s8, v8s8}, 
+          {v16s8, v16s8}, 
+          {v4s16, v4s16}, 
+          {v8s16, v8s16}, 
+          {v2s32, v2s32}, 
+          {v4s32, v4s32}, 
+          {v2s64, v2s64}, 
+      }) 
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
@@ -161,25 +161,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder({G_SREM, G_UREM})
       .lowerFor({s1, s8, s16, s32, s64});
 
-  getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
+  getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}}); 
 
   getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
 
-  getActionDefinitionsBuilder(
-      {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
+  getActionDefinitionsBuilder( 
+      {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) 
       .legalFor({{s32, s1}, {s64, s1}})
       .minScalar(0, s32);
 
   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
-      .legalFor({s32, s64, v2s64, v4s32, v2s32})
-      .clampNumElements(0, v2s32, v4s32)
-      .clampNumElements(0, v2s64, v2s64);
+      .legalFor({s32, s64, v2s64, v4s32, v2s32}) 
+      .clampNumElements(0, v2s32, v4s32) 
+      .clampNumElements(0, v2s64, v2s64); 
 
   getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
 
   getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
                                G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
-                               G_FNEARBYINT, G_INTRINSIC_LRINT})
+                               G_FNEARBYINT, G_INTRINSIC_LRINT}) 
       // If we don't have full FP16 support, then scalarize the elements of
       // vectors containing fp16 types.
       .fewerElementsIf(
@@ -285,7 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {v4s32, p0, 128, 8},
                                  {v2s64, p0, 128, 8}})
       // These extends are also legal
-      .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
+      .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) 
       .clampScalar(0, s8, s64)
       .lowerIfMemSizeNotPow2()
       // Lower any any-extending loads left into G_ANYEXT and G_LOAD
@@ -307,7 +307,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {p0, p0, 64, 8},
                                  {s128, p0, 128, 8},
                                  {v16s8, p0, 128, 8},
-                                 {v8s8, p0, 64, 8},
+                                 {v8s8, p0, 64, 8}, 
                                  {v4s16, p0, 64, 8},
                                  {v8s16, p0, 128, 8},
                                  {v2s32, p0, 64, 8},
@@ -325,19 +325,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // Constants
   getActionDefinitionsBuilder(G_CONSTANT)
-      .legalFor({p0, s8, s16, s32, s64})
+      .legalFor({p0, s8, s16, s32, s64}) 
       .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0);
   getActionDefinitionsBuilder(G_FCONSTANT)
-      .legalIf([=](const LegalityQuery &Query) {
-        const auto &Ty = Query.Types[0];
-        if (HasFP16 && Ty == s16)
-          return true;
-        return Ty == s32 || Ty == s64 || Ty == s128;
-      })
-      .clampScalar(0, MinFPScalar, s128);
-
-  getActionDefinitionsBuilder({G_ICMP, G_FCMP})
+      .legalIf([=](const LegalityQuery &Query) { 
+        const auto &Ty = Query.Types[0]; 
+        if (HasFP16 && Ty == s16) 
+          return true; 
+        return Ty == s32 || Ty == s64 || Ty == s128; 
+      }) 
+      .clampScalar(0, MinFPScalar, s128); 
+
+  getActionDefinitionsBuilder({G_ICMP, G_FCMP}) 
       .legalFor({{s32, s32},
                  {s32, s64},
                  {s32, p0},
@@ -365,8 +365,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalarOrEltIf(
           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
           s64)
-      .widenScalarOrEltToNextPow2(1)
-      .clampNumElements(0, v2s32, v4s32);
+      .widenScalarOrEltToNextPow2(1) 
+      .clampNumElements(0, v2s32, v4s32); 
 
   // Extensions
   auto ExtLegalFunc = [=](const LegalityQuery &Query) {
@@ -374,7 +374,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
     if (DstSize == 128 && !Query.Types[0].isVector())
       return false; // Extending to a scalar s128 needs narrowing.
-
+ 
     // Make sure that we have something that will fit in a register, and
     // make sure it's a power of 2.
     if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
@@ -399,28 +399,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalIf(ExtLegalFunc)
       .clampScalar(0, s64, s64); // Just for s128, others are handled above.
 
-  getActionDefinitionsBuilder(G_TRUNC)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
-          0, s8)
-      .customIf([=](const LegalityQuery &Query) {
-        LLT DstTy = Query.Types[0];
-        LLT SrcTy = Query.Types[1];
-        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
-      })
-      .alwaysLegal();
+  getActionDefinitionsBuilder(G_TRUNC) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, 
+          0, s8) 
+      .customIf([=](const LegalityQuery &Query) { 
+        LLT DstTy = Query.Types[0]; 
+        LLT SrcTy = Query.Types[1]; 
+        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; 
+      }) 
+      .alwaysLegal(); 
 
-  getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
+  getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); 
 
   // FP conversions
-  getActionDefinitionsBuilder(G_FPTRUNC)
-      .legalFor(
-          {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
-      .clampMaxNumElements(0, s32, 2);
-  getActionDefinitionsBuilder(G_FPEXT)
-      .legalFor(
-          {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
-      .clampMaxNumElements(0, s64, 2);
+  getActionDefinitionsBuilder(G_FPTRUNC) 
+      .legalFor( 
+          {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) 
+      .clampMaxNumElements(0, s32, 2); 
+  getActionDefinitionsBuilder(G_FPEXT) 
+      .legalFor( 
+          {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}) 
+      .clampMaxNumElements(0, s64, 2); 
 
   // Conversions
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
@@ -433,7 +433,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
       .clampScalar(1, s32, s64)
-      .minScalarSameAs(1, 0)
+      .minScalarSameAs(1, 0) 
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0);
 
@@ -445,8 +445,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
       .clampScalar(0, s32, s64)
       .widenScalarToNextPow2(0)
-      .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
-      .lowerIf(isVector(0));
+      .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) 
+      .lowerIf(isVector(0)); 
 
   // Pointer-handling
   getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -576,8 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
           return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
         })
         // Any vectors left are the wrong size. Scalarize them.
-        .scalarize(0)
-        .scalarize(1);
+        .scalarize(0) 
+        .scalarize(1); 
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -589,40 +589,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalIf([=](const LegalityQuery &Query) {
         const LLT &VecTy = Query.Types[1];
         return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
-               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
-               VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
-      })
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            // We want to promote to <M x s1> to <M x s64> if that wouldn't
-            // cause the total vec size to be > 128b.
-            return Query.Types[1].getNumElements() <= 2;
-          },
-          0, s64)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 4;
-          },
-          0, s32)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 8;
-          },
-          0, s16)
-      .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) {
-            return Query.Types[1].getNumElements() <= 16;
-          },
-          0, s8)
-      .minScalarOrElt(0, s8); // Worst case, we need at least s8.
+               VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || 
+               VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; 
+      }) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            // We want to promote to <M x s1> to <M x s64> if that wouldn't 
+            // cause the total vec size to be > 128b. 
+            return Query.Types[1].getNumElements() <= 2; 
+          }, 
+          0, s64) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 4; 
+          }, 
+          0, s32) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 8; 
+          }, 
+          0, s16) 
+      .minScalarOrEltIf( 
+          [=](const LegalityQuery &Query) { 
+            return Query.Types[1].getNumElements() <= 16; 
+          }, 
+          0, s8) 
+      .minScalarOrElt(0, s8); // Worst case, we need at least s8. 
 
   getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
-      .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
+      .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64})); 
 
   getActionDefinitionsBuilder(G_BUILD_VECTOR)
-      .legalFor({{v8s8, s8},
-                 {v16s8, s8},
-                 {v4s16, s16},
+      .legalFor({{v8s8, s8}, 
+                 {v16s8, s8}, 
+                 {v4s16, s16}, 
                  {v8s16, s16},
                  {v2s32, s32},
                  {v4s32, s32},
@@ -638,9 +638,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       })
       .minScalarSameAs(1, 0);
 
-  getActionDefinitionsBuilder(G_CTLZ)
-      .legalForCartesianProduct(
-          {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+  getActionDefinitionsBuilder(G_CTLZ) 
+      .legalForCartesianProduct( 
+          {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 
       .scalarize(1);
 
   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -651,7 +651,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         // to be the same size as the dest.
         if (DstTy != SrcTy)
           return false;
-        for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
+        for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) { 
           if (DstTy == Ty)
             return true;
         }
@@ -668,7 +668,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
       .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
 
-  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
+  getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}}); 
 
   getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
     return Query.Types[0] == p0 && Query.Types[1] == s64;
@@ -676,20 +676,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
 
-  getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
-
-  getActionDefinitionsBuilder(G_ABS).lowerIf(
-      [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
-
-  getActionDefinitionsBuilder(G_VECREDUCE_FADD)
-      // We only have FADDP to do reduction-like operations. Lower the rest.
-      .legalFor({{s32, v2s32}, {s64, v2s64}})
-      .lower();
-
-  getActionDefinitionsBuilder(G_VECREDUCE_ADD)
-      .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}})
-      .lower();
-
+  getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); 
+ 
+  getActionDefinitionsBuilder(G_ABS).lowerIf( 
+      [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); }); 
+ 
+  getActionDefinitionsBuilder(G_VECREDUCE_FADD) 
+      // We only have FADDP to do reduction-like operations. Lower the rest. 
+      .legalFor({{s32, v2s32}, {s64, v2s64}}) 
+      .lower(); 
+ 
+  getActionDefinitionsBuilder(G_VECREDUCE_ADD) 
+      .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}}) 
+      .lower(); 
+ 
   computeTables();
   verify(*ST.getInstrInfo());
 }
@@ -714,63 +714,63 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
   case TargetOpcode::G_GLOBAL_VALUE:
     return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
-  case TargetOpcode::G_TRUNC:
-    return legalizeVectorTrunc(MI, Helper);
+  case TargetOpcode::G_TRUNC: 
+    return legalizeVectorTrunc(MI, Helper); 
   }
 
   llvm_unreachable("expected switch to return");
 }
 
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
-                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
-                         SmallVectorImpl<Register> &VRegs) {
-  for (int I = 0; I < NumParts; ++I)
-    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
-  MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
-    MachineInstr &MI, LegalizerHelper &Helper) const {
-  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  // Similar to how operand splitting is done in SelectiondDAG, we can handle
-  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
-  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
-  //   %lo16(<4 x s16>) = G_TRUNC %inlo
-  //   %hi16(<4 x s16>) = G_TRUNC %inhi
-  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
-  //   %res(<8 x s8>) = G_TRUNC %in16
-
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  LLT DstTy = MRI.getType(DstReg);
-  LLT SrcTy = MRI.getType(SrcReg);
-  assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
-         isPowerOf2_32(SrcTy.getSizeInBits()));
-
-  // Split input type.
-  LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
-  // First, split the source into two smaller vectors.
-  SmallVector<Register, 2> SplitSrcs;
-  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
-  // Truncate the splits into intermediate narrower elements.
-  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
-  for (unsigned I = 0; I < SplitSrcs.size(); ++I)
-    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
-  auto Concat = MIRBuilder.buildConcatVectors(
-      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
-  Helper.Observer.changingInstr(MI);
-  MI.getOperand(1).setReg(Concat.getReg(0));
-  Helper.Observer.changedInstr(MI);
-  return true;
-}
-
-bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
-    GISelChangeObserver &Observer) const {
+static void extractParts(Register Reg, MachineRegisterInfo &MRI, 
+                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, 
+                         SmallVectorImpl<Register> &VRegs) { 
+  for (int I = 0; I < NumParts; ++I) 
+    VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); 
+  MIRBuilder.buildUnmerge(VRegs, Reg); 
+} 
+ 
+bool AArch64LegalizerInfo::legalizeVectorTrunc( 
+    MachineInstr &MI, LegalizerHelper &Helper) const { 
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 
+  // Similar to how operand splitting is done in SelectiondDAG, we can handle 
+  // %res(v8s8) = G_TRUNC %in(v8s32) by generating: 
+  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) 
+  //   %lo16(<4 x s16>) = G_TRUNC %inlo 
+  //   %hi16(<4 x s16>) = G_TRUNC %inhi 
+  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 
+  //   %res(<8 x s8>) = G_TRUNC %in16 
+ 
+  Register DstReg = MI.getOperand(0).getReg(); 
+  Register SrcReg = MI.getOperand(1).getReg(); 
+  LLT DstTy = MRI.getType(DstReg); 
+  LLT SrcTy = MRI.getType(SrcReg); 
+  assert(isPowerOf2_32(DstTy.getSizeInBits()) && 
+         isPowerOf2_32(SrcTy.getSizeInBits())); 
+ 
+  // Split input type. 
+  LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); 
+  // First, split the source into two smaller vectors. 
+  SmallVector<Register, 2> SplitSrcs; 
+  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); 
+ 
+  // Truncate the splits into intermediate narrower elements. 
+  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); 
+  for (unsigned I = 0; I < SplitSrcs.size(); ++I) 
+    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); 
+ 
+  auto Concat = MIRBuilder.buildConcatVectors( 
+      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); 
+ 
+  Helper.Observer.changingInstr(MI); 
+  MI.getOperand(1).setReg(Concat.getReg(0)); 
+  Helper.Observer.changedInstr(MI); 
+  return true; 
+} 
+ 
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 
+    GISelChangeObserver &Observer) const { 
   assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
   // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
   // G_ADD_LOW instructions.
@@ -792,27 +792,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   // Set the regclass on the dest reg too.
   MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
 
-  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
-  // by creating a MOVK that sets bits 48-63 of the register to (global address
-  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
-  // prevent an incorrect tag being generated during relocation when the the
-  // global appears before the code section. Without the offset, a global at
-  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
-  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
-  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
-  // instead of `0xf`.
-  // This assumes that we're in the small code model so we can assume a binary
-  // size of <= 4GB, which makes the untagged PC relative offset positive. The
-  // binary must also be loaded into address range [0, 2^48). Both of these
-  // properties need to be ensured at runtime when using tagged addresses.
-  if (OpFlags & AArch64II::MO_TAGGED) {
-    ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
-               .addGlobalAddress(GV, 0x100000000,
-                                 AArch64II::MO_PREL | AArch64II::MO_G3)
-               .addImm(48);
-    MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
-  }
-
+  // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so 
+  // by creating a MOVK that sets bits 48-63 of the register to (global address 
+  // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to 
+  // prevent an incorrect tag being generated during relocation when the the 
+  // global appears before the code section. Without the offset, a global at 
+  // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced 
+  // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = 
+  // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` 
+  // instead of `0xf`. 
+  // This assumes that we're in the small code model so we can assume a binary 
+  // size of <= 4GB, which makes the untagged PC relative offset positive. The 
+  // binary must also be loaded into address range [0, 2^48). Both of these 
+  // properties need to be ensured at runtime when using tagged addresses. 
+  if (OpFlags & AArch64II::MO_TAGGED) { 
+    ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) 
+               .addGlobalAddress(GV, 0x100000000, 
+                                 AArch64II::MO_PREL | AArch64II::MO_G3) 
+               .addImm(48); 
+    MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); 
+  } 
+ 
   MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
       .addGlobalAddress(GV, 0,
                         OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
@@ -820,8 +820,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
   return true;
 }
 
-bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
-                                             MachineInstr &MI) const {
+bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, 
+                                             MachineInstr &MI) const { 
   return true;
 }
 
@@ -838,13 +838,13 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
   if (!VRegAndVal)
     return true;
   // Check the shift amount is in range for an immediate form.
-  int64_t Amount = VRegAndVal->Value.getSExtValue();
+  int64_t Amount = VRegAndVal->Value.getSExtValue(); 
   if (Amount > 31)
     return true; // This will have to remain a register variant.
   auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
-  Observer.changingInstr(MI);
+  Observer.changingInstr(MI); 
   MI.getOperand(2).setReg(ExtCst.getReg(0));
-  Observer.changedInstr(MI);
+  Observer.changedInstr(MI); 
   return true;
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8217e37c85..c22cb26608 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -15,7 +15,7 @@
 #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
 
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 namespace llvm {
@@ -46,7 +46,7 @@ private:
   bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                                   MachineIRBuilder &MIRBuilder,
                                   GISelChangeObserver &Observer) const;
-  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
+  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; 
   const AArch64Subtarget *ST;
 };
 } // End llvm namespace.
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fdd04cb77f..bf3190ce93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -1,22 +1,22 @@
-//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
+//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===// 
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization combines on generic MachineInstrs.
-///
-/// The combines here must preserve instruction legality.
-///
-/// Lowering combines (e.g. pseudo matching) should be handled by
-/// AArch64PostLegalizerLowering.
-///
-/// Combines which don't rely on instruction legality should go in the
-/// AArch64PreLegalizerCombiner.
-///
+/// 
+/// \file 
+/// Post-legalization combines on generic MachineInstrs. 
+/// 
+/// The combines here must preserve instruction legality. 
+/// 
+/// Lowering combines (e.g. pseudo matching) should be handled by 
+/// AArch64PostLegalizerLowering. 
+/// 
+/// Combines which don't rely on instruction legality should go in the 
+/// AArch64PreLegalizerCombiner. 
+/// 
 //===----------------------------------------------------------------------===//
 
 #include "AArch64TargetMachine.h"
@@ -24,12 +24,12 @@
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 
+#include "llvm/CodeGen/GlobalISel/Utils.h" 
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h" 
+#include "llvm/CodeGen/TargetOpcodes.h" 
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
 
@@ -37,202 +37,202 @@
 
 using namespace llvm;
 
-/// This combine tries do what performExtractVectorEltCombine does in SDAG.
-/// Rewrite for pairwise fadd pattern
-///   (s32 (g_extract_vector_elt
-///           (g_fadd (vXs32 Other)
-///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
-/// ->
-///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
-///              (g_extract_vector_elt (vXs32 Other) 1))
-bool matchExtractVecEltPairwiseAdd(
-    MachineInstr &MI, MachineRegisterInfo &MRI,
-    std::tuple<unsigned, LLT, Register> &MatchInfo) {
-  Register Src1 = MI.getOperand(1).getReg();
-  Register Src2 = MI.getOperand(2).getReg();
-  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
-  auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
-  if (!Cst || Cst->Value != 0)
+/// This combine tries do what performExtractVectorEltCombine does in SDAG. 
+/// Rewrite for pairwise fadd pattern 
+///   (s32 (g_extract_vector_elt 
+///           (g_fadd (vXs32 Other) 
+///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) 
+/// -> 
+///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) 
+///              (g_extract_vector_elt (vXs32 Other) 1)) 
+bool matchExtractVecEltPairwiseAdd( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, 
+    std::tuple<unsigned, LLT, Register> &MatchInfo) { 
+  Register Src1 = MI.getOperand(1).getReg(); 
+  Register Src2 = MI.getOperand(2).getReg(); 
+  LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
+
+  auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); 
+  if (!Cst || Cst->Value != 0) 
     return false;
-  // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
+  // SDAG also checks for FullFP16, but this looks to be beneficial anyway. 
 
-  // Now check for an fadd operation. TODO: expand this for integer add?
-  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
-  if (!FAddMI)
+  // Now check for an fadd operation. TODO: expand this for integer add? 
+  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); 
+  if (!FAddMI) 
     return false;
 
-  // If we add support for integer add, must restrict these types to just s64.
-  unsigned DstSize = DstTy.getSizeInBits();
-  if (DstSize != 16 && DstSize != 32 && DstSize != 64)
+  // If we add support for integer add, must restrict these types to just s64. 
+  unsigned DstSize = DstTy.getSizeInBits(); 
+  if (DstSize != 16 && DstSize != 32 && DstSize != 64) 
     return false;
 
-  Register Src1Op1 = FAddMI->getOperand(1).getReg();
-  Register Src1Op2 = FAddMI->getOperand(2).getReg();
-  MachineInstr *Shuffle =
-      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
-  MachineInstr *Other = MRI.getVRegDef(Src1Op1);
-  if (!Shuffle) {
-    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
-    Other = MRI.getVRegDef(Src1Op2);
+  Register Src1Op1 = FAddMI->getOperand(1).getReg(); 
+  Register Src1Op2 = FAddMI->getOperand(2).getReg(); 
+  MachineInstr *Shuffle = 
+      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); 
+  MachineInstr *Other = MRI.getVRegDef(Src1Op1); 
+  if (!Shuffle) { 
+    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); 
+    Other = MRI.getVRegDef(Src1Op2); 
   }
 
-  // We're looking for a shuffle that moves the second element to index 0.
-  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
-      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
-    std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
-    std::get<1>(MatchInfo) = DstTy;
-    std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
+  // We're looking for a shuffle that moves the second element to index 0. 
+  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && 
+      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { 
+    std::get<0>(MatchInfo) = TargetOpcode::G_FADD; 
+    std::get<1>(MatchInfo) = DstTy; 
+    std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); 
     return true;
   }
   return false;
 }
 
-bool applyExtractVecEltPairwiseAdd(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
-    std::tuple<unsigned, LLT, Register> &MatchInfo) {
-  unsigned Opc = std::get<0>(MatchInfo);
-  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
-  // We want to generate two extracts of elements 0 and 1, and add them.
-  LLT Ty = std::get<1>(MatchInfo);
-  Register Src = std::get<2>(MatchInfo);
-  LLT s64 = LLT::scalar(64);
-  B.setInstrAndDebugLoc(MI);
-  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
-  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
-  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
-  MI.eraseFromParent();
+bool applyExtractVecEltPairwiseAdd( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, 
+    std::tuple<unsigned, LLT, Register> &MatchInfo) { 
+  unsigned Opc = std::get<0>(MatchInfo); 
+  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); 
+  // We want to generate two extracts of elements 0 and 1, and add them. 
+  LLT Ty = std::get<1>(MatchInfo); 
+  Register Src = std::get<2>(MatchInfo); 
+  LLT s64 = LLT::scalar(64); 
+  B.setInstrAndDebugLoc(MI); 
+  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); 
+  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); 
+  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); 
+  MI.eraseFromParent(); 
   return true;
 }
 
-static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
-  // TODO: check if extended build vector as well.
-  unsigned Opc = MRI.getVRegDef(R)->getOpcode();
-  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) { 
+  // TODO: check if extended build vector as well. 
+  unsigned Opc = MRI.getVRegDef(R)->getOpcode(); 
+  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG; 
 }
 
-static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
-  // TODO: check if extended build vector as well.
-  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { 
+  // TODO: check if extended build vector as well. 
+  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; 
 }
 
-bool matchAArch64MulConstCombine(
-    MachineInstr &MI, MachineRegisterInfo &MRI,
-    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
-  assert(MI.getOpcode() == TargetOpcode::G_MUL);
-  Register LHS = MI.getOperand(1).getReg();
-  Register RHS = MI.getOperand(2).getReg();
-  Register Dst = MI.getOperand(0).getReg();
-  const LLT Ty = MRI.getType(LHS);
-
-  // The below optimizations require a constant RHS.
-  auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
-  if (!Const)
+bool matchAArch64MulConstCombine( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, 
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { 
+  assert(MI.getOpcode() == TargetOpcode::G_MUL); 
+  Register LHS = MI.getOperand(1).getReg(); 
+  Register RHS = MI.getOperand(2).getReg(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  const LLT Ty = MRI.getType(LHS); 
+
+  // The below optimizations require a constant RHS. 
+  auto Const = getConstantVRegValWithLookThrough(RHS, MRI); 
+  if (!Const) 
     return false;
 
-  const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
-  // The following code is ported from AArch64ISelLowering.
-  // Multiplication of a power of two plus/minus one can be done more
-  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
-  // future CPUs have a cheaper MADD instruction, this may need to be
-  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
-  // 64-bit is 5 cycles, so this is always a win.
-  // More aggressively, some multiplications N0 * C can be lowered to
-  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
-  // e.g. 6=3*2=(2+1)*2.
-  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
-  // which equals to (1+2)*16-(1+2).
-  // TrailingZeroes is used to test if the mul can be lowered to
-  // shift+add+shift.
-  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
-  if (TrailingZeroes) {
-    // Conservatively do not lower to shift+add+shift if the mul might be
-    // folded into smul or umul.
-    if (MRI.hasOneNonDBGUse(LHS) &&
-        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
-      return false;
-    // Conservatively do not lower to shift+add+shift if the mul might be
-    // folded into madd or msub.
-    if (MRI.hasOneNonDBGUse(Dst)) {
-      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
-      if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
-          UseMI.getOpcode() == TargetOpcode::G_SUB)
-        return false;
-    }
-  }
-  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
-  // and shift+add+shift.
-  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
-
-  unsigned ShiftAmt, AddSubOpc;
-  // Is the shifted value the LHS operand of the add/sub?
-  bool ShiftValUseIsLHS = true;
-  // Do we need to negate the result?
-  bool NegateResult = false;
-
-  if (ConstValue.isNonNegative()) {
-    // (mul x, 2^N + 1) => (add (shl x, N), x)
-    // (mul x, 2^N - 1) => (sub (shl x, N), x)
-    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
-    APInt SCVMinus1 = ShiftedConstValue - 1;
-    APInt CVPlus1 = ConstValue + 1;
-    if (SCVMinus1.isPowerOf2()) {
-      ShiftAmt = SCVMinus1.logBase2();
-      AddSubOpc = TargetOpcode::G_ADD;
-    } else if (CVPlus1.isPowerOf2()) {
-      ShiftAmt = CVPlus1.logBase2();
-      AddSubOpc = TargetOpcode::G_SUB;
-    } else
-      return false;
-  } else {
-    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
-    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
-    APInt CVNegPlus1 = -ConstValue + 1;
-    APInt CVNegMinus1 = -ConstValue - 1;
-    if (CVNegPlus1.isPowerOf2()) {
-      ShiftAmt = CVNegPlus1.logBase2();
-      AddSubOpc = TargetOpcode::G_SUB;
-      ShiftValUseIsLHS = false;
-    } else if (CVNegMinus1.isPowerOf2()) {
-      ShiftAmt = CVNegMinus1.logBase2();
-      AddSubOpc = TargetOpcode::G_ADD;
-      NegateResult = true;
-    } else
-      return false;
-  }
-
-  if (NegateResult && TrailingZeroes)
+  const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits()); 
+  // The following code is ported from AArch64ISelLowering. 
+  // Multiplication of a power of two plus/minus one can be done more 
+  // cheaply as as shift+add/sub. For now, this is true unilaterally. If 
+  // future CPUs have a cheaper MADD instruction, this may need to be 
+  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and 
+  // 64-bit is 5 cycles, so this is always a win. 
+  // More aggressively, some multiplications N0 * C can be lowered to 
+  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, 
+  // e.g. 6=3*2=(2+1)*2. 
+  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 
+  // which equals to (1+2)*16-(1+2). 
+  // TrailingZeroes is used to test if the mul can be lowered to 
+  // shift+add+shift. 
+  unsigned TrailingZeroes = ConstValue.countTrailingZeros(); 
+  if (TrailingZeroes) { 
+    // Conservatively do not lower to shift+add+shift if the mul might be 
+    // folded into smul or umul. 
+    if (MRI.hasOneNonDBGUse(LHS) && 
+        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI))) 
+      return false; 
+    // Conservatively do not lower to shift+add+shift if the mul might be 
+    // folded into madd or msub. 
+    if (MRI.hasOneNonDBGUse(Dst)) { 
+      MachineInstr &UseMI = *MRI.use_instr_begin(Dst); 
+      if (UseMI.getOpcode() == TargetOpcode::G_ADD || 
+          UseMI.getOpcode() == TargetOpcode::G_SUB) 
+        return false; 
+    } 
+  } 
+  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub 
+  // and shift+add+shift. 
+  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); 
+
+  unsigned ShiftAmt, AddSubOpc; 
+  // Is the shifted value the LHS operand of the add/sub? 
+  bool ShiftValUseIsLHS = true; 
+  // Do we need to negate the result? 
+  bool NegateResult = false; 
+
+  if (ConstValue.isNonNegative()) { 
+    // (mul x, 2^N + 1) => (add (shl x, N), x) 
+    // (mul x, 2^N - 1) => (sub (shl x, N), x) 
+    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) 
+    APInt SCVMinus1 = ShiftedConstValue - 1; 
+    APInt CVPlus1 = ConstValue + 1; 
+    if (SCVMinus1.isPowerOf2()) { 
+      ShiftAmt = SCVMinus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_ADD; 
+    } else if (CVPlus1.isPowerOf2()) { 
+      ShiftAmt = CVPlus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_SUB; 
+    } else 
+      return false; 
+  } else { 
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 
+    // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 
+    APInt CVNegPlus1 = -ConstValue + 1; 
+    APInt CVNegMinus1 = -ConstValue - 1; 
+    if (CVNegPlus1.isPowerOf2()) { 
+      ShiftAmt = CVNegPlus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_SUB; 
+      ShiftValUseIsLHS = false; 
+    } else if (CVNegMinus1.isPowerOf2()) { 
+      ShiftAmt = CVNegMinus1.logBase2(); 
+      AddSubOpc = TargetOpcode::G_ADD; 
+      NegateResult = true; 
+    } else 
+      return false; 
+  } 
+
+  if (NegateResult && TrailingZeroes) 
     return false;
 
-  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
-    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
-    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
-
-    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
-    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
-    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
-    assert(!(NegateResult && TrailingZeroes) &&
-           "NegateResult and TrailingZeroes cannot both be true for now.");
-    // Negate the result.
-    if (NegateResult) {
-      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
-      return;
-    }
-    // Shift the result.
-    if (TrailingZeroes) {
-      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
-      return;
-    }
-    B.buildCopy(DstReg, Res.getReg(0));
-  };
+  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) { 
+    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt); 
+    auto ShiftedVal = B.buildShl(Ty, LHS, Shift); 
+
+    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS; 
+    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0); 
+    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS}); 
+    assert(!(NegateResult && TrailingZeroes) && 
+           "NegateResult and TrailingZeroes cannot both be true for now."); 
+    // Negate the result. 
+    if (NegateResult) { 
+      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res); 
+      return; 
+    } 
+    // Shift the result. 
+    if (TrailingZeroes) { 
+      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes)); 
+      return; 
+    } 
+    B.buildCopy(DstReg, Res.getReg(0)); 
+  }; 
   return true;
 }
 
-bool applyAArch64MulConstCombine(
-    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
-    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
-  B.setInstrAndDebugLoc(MI);
-  ApplyFn(B, MI.getOperand(0).getReg());
+bool applyAArch64MulConstCombine( 
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, 
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) { 
+  B.setInstrAndDebugLoc(MI); 
+  ApplyFn(B, MI.getOperand(0).getReg()); 
   MI.eraseFromParent();
   return true;
 }
@@ -348,7 +348,7 @@ INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
                     false)
 
 namespace llvm {
-FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { 
   return new AArch64PostLegalizerCombiner(IsOptNone);
 }
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index a06ff4b541..0447c3e8a0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1,704 +1,704 @@
-//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization lowering for instructions.
-///
-/// This is used to offload pattern matching from the selector.
-///
-/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
-/// a G_ZIP, G_UZP, etc.
-///
-/// General optimization combines should be handled by either the
-/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AArch64TargetMachine.h"
-#include "AArch64GlobalISelUtils.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/Combiner.h"
-#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
-#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
-
-using namespace llvm;
-using namespace MIPatternMatch;
-using namespace AArch64GISelUtils;
-
-/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
-///
-/// Used for matching target-supported shuffles before codegen.
-struct ShuffleVectorPseudo {
-  unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
-  Register Dst; ///< Destination register.
-  SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
-  ShuffleVectorPseudo(unsigned Opc, Register Dst,
-                      std::initializer_list<SrcOp> SrcOps)
-      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
-  ShuffleVectorPseudo() {}
-};
-
-/// Check if a vector shuffle corresponds to a REV instruction with the
-/// specified blocksize.
-static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
-                      unsigned BlockSize) {
-  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
-         "Only possible block sizes for REV are: 16, 32, 64");
-  assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
-
-  unsigned BlockElts = M[0] + 1;
-
-  // If the first shuffle index is UNDEF, be optimistic.
-  if (M[0] < 0)
-    BlockElts = BlockSize / EltSize;
-
-  if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
-    return false;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    // Ignore undef indices.
-    if (M[i] < 0)
-      continue;
-    if (static_cast<unsigned>(M[i]) !=
-        (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
-      return false;
-  }
-
-  return true;
-}
-
-/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
-/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
-static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  if (NumElts % 2 != 0)
-    return false;
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
-        (M[i + 1] >= 0 &&
-         static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
-      return false;
-  }
-  return true;
-}
-
-/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
-/// sources of the shuffle are different.
-static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
-                                                      unsigned NumElts) {
-  // Look for the first non-undef element.
-  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
-  if (FirstRealElt == M.end())
-    return None;
-
-  // Use APInt to handle overflow when calculating expected element.
-  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
-  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
-
-  // The following shuffle indices must be the successive elements after the
-  // first real element.
-  if (any_of(
-          make_range(std::next(FirstRealElt), M.end()),
-          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
-    return None;
-
-  // The index of an EXT is the first element if it is not UNDEF.
-  // Watch out for the beginning UNDEFs. The EXT index should be the expected
-  // value of the first element.  E.g.
-  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
-  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
-  // ExpectedElt is the last mask index plus 1.
-  uint64_t Imm = ExpectedElt.getZExtValue();
-  bool ReverseExt = false;
-
-  // There are two difference cases requiring to reverse input vectors.
-  // For example, for vector <4 x i32> we have the following cases,
-  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
-  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
-  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
-  // to reverse two input vectors.
-  if (Imm < NumElts)
-    ReverseExt = true;
-  else
-    Imm -= NumElts;
-  return std::make_pair(ReverseExt, Imm);
-}
-
-/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
-/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
-static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    // Skip undef indices.
-    if (M[i] < 0)
-      continue;
-    if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
-      return false;
-  }
-  return true;
-}
-
-/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
-/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
-static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
-                      unsigned &WhichResult) {
-  if (NumElts % 2 != 0)
-    return false;
-
-  // 0 means use ZIP1, 1 means use ZIP2.
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-      if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
-          (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
-        return false;
-    Idx += 1;
-  }
-  return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
-/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
-static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  Register Src = MI.getOperand(1).getReg();
-  LLT Ty = MRI.getType(Dst);
-  unsigned EltSize = Ty.getScalarSizeInBits();
-
-  // Element size for a rev cannot be 64.
-  if (EltSize == 64)
-    return false;
-
-  unsigned NumElts = Ty.getNumElements();
-
-  // Try to produce G_REV64
-  if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
-    MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
-    return true;
-  }
-
-  // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
-  // This should be identical to above, but with a constant 32 and constant
-  // 16.
-  return false;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_TRN1 or G_TRN2 instruction.
-static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_UZP1 or G_UZP2 instruction.
-///
-/// \param [in] MI - The shuffle vector instruction.
-/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
-static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  unsigned WhichResult;
-  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
-  Register Dst = MI.getOperand(0).getReg();
-  unsigned NumElts = MRI.getType(Dst).getNumElements();
-  if (!isZipMask(ShuffleMask, NumElts, WhichResult))
-    return false;
-  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
-  return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
-                                        MachineRegisterInfo &MRI,
-                                        ShuffleVectorPseudo &MatchInfo) {
-  if (Lane != 0)
-    return false;
-
-  // Try to match a vector splat operation into a dup instruction.
-  // We're looking for this pattern:
-  //
-  // %scalar:gpr(s64) = COPY $x0
-  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
-  // %cst0:gpr(s32) = G_CONSTANT i32 0
-  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
-  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
-  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
-  //
-  // ...into:
-  // %splat = G_DUP %scalar
-
-  // Begin matching the insert.
-  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
-                             MI.getOperand(1).getReg(), MRI);
-  if (!InsMI)
-    return false;
-  // Match the undef vector operand.
-  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
-                    MRI))
-    return false;
-
-  // Match the index constant 0.
-  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
-    return false;
-
-  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
-                                  {InsMI->getOperand(2).getReg()});
-  return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
-                                    MachineRegisterInfo &MRI,
-                                    ShuffleVectorPseudo &MatchInfo) {
-  assert(Lane >= 0 && "Expected positive lane?");
-  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
-  // lane's definition directly.
-  auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
-                                  MI.getOperand(1).getReg(), MRI);
-  if (!BuildVecMI)
-    return false;
-  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
-  MatchInfo =
-      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
-  return true;
-}
-
-static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  auto MaybeLane = getSplatIndex(MI);
-  if (!MaybeLane)
-    return false;
-  int Lane = *MaybeLane;
-  // If this is undef splat, generate it via "just" vdup, if possible.
-  if (Lane < 0)
-    Lane = 0;
-  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
-    return true;
-  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
-    return true;
-  return false;
-}
-
-static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     ShuffleVectorPseudo &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  Register Dst = MI.getOperand(0).getReg();
-  auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
-                            MRI.getType(Dst).getNumElements());
-  if (!ExtInfo)
-    return false;
-  bool ReverseExt;
-  uint64_t Imm;
-  std::tie(ReverseExt, Imm) = *ExtInfo;
-  Register V1 = MI.getOperand(1).getReg();
-  Register V2 = MI.getOperand(2).getReg();
-  if (ReverseExt)
-    std::swap(V1, V2);
-  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
-  Imm *= ExtFactor;
-  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
-  return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
-/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI,
-                                     ShuffleVectorPseudo &MatchInfo) {
-  MachineIRBuilder MIRBuilder(MI);
-  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
-  MI.eraseFromParent();
-  return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
-/// Special-cased because the constant operand must be emitted as a G_CONSTANT
-/// for the imported tablegen patterns to work.
-static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
-  MachineIRBuilder MIRBuilder(MI);
-  // Tablegen patterns expect an i32 G_CONSTANT as the final op.
-  auto Cst =
-      MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
-  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
-                        {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
-  MI.eraseFromParent();
-  return true;
-}
-
-/// isVShiftRImm - Check if this is a valid vector for the immediate
-/// operand of a vector shift right operation. The value must be in the range:
-///   1 <= Value <= ElementBits for a right shift.
-static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
-                         int64_t &Cnt) {
-  assert(Ty.isVector() && "vector shift count is not a vector type");
-  MachineInstr *MI = MRI.getVRegDef(Reg);
-  auto Cst = getBuildVectorConstantSplat(*MI, MRI);
-  if (!Cst)
-    return false;
-  Cnt = *Cst;
-  int64_t ElementBits = Ty.getScalarSizeInBits();
-  return Cnt >= 1 && Cnt <= ElementBits;
-}
-
-/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
-static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              int64_t &Imm) {
-  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
-         MI.getOpcode() == TargetOpcode::G_LSHR);
-  LLT Ty = MRI.getType(MI.getOperand(1).getReg());
-  if (!Ty.isVector())
-    return false;
-  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
-}
-
-static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
-                              int64_t &Imm) {
-  unsigned Opc = MI.getOpcode();
-  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
-  unsigned NewOpc =
-      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
-  MachineIRBuilder MIB(MI);
-  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
-  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
-  MI.eraseFromParent();
-  return true;
-}
-
-/// Determine if it is possible to modify the \p RHS and predicate \p P of a
-/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
-///
-/// \returns A pair containing the updated immediate and predicate which may
-/// be used to optimize the instruction.
-///
-/// \note This assumes that the comparison has been legalized.
-Optional<std::pair<uint64_t, CmpInst::Predicate>>
-tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
-                          const MachineRegisterInfo &MRI) {
-  const auto &Ty = MRI.getType(RHS);
-  if (Ty.isVector())
-    return None;
-  unsigned Size = Ty.getSizeInBits();
-  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
-
-  // If the RHS is not a constant, or the RHS is already a valid arithmetic
-  // immediate, then there is nothing to change.
-  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
-  if (!ValAndVReg)
-    return None;
-  uint64_t C = ValAndVReg->Value.getZExtValue();
-  if (isLegalArithImmed(C))
-    return None;
-
-  // We have a non-arithmetic immediate. Check if adjusting the immediate and
-  // adjusting the predicate will result in a legal arithmetic immediate.
-  switch (P) {
-  default:
-    return None;
-  case CmpInst::ICMP_SLT:
-  case CmpInst::ICMP_SGE:
-    // Check for
-    //
-    // x slt c => x sle c - 1
-    // x sge c => x sgt c - 1
-    //
-    // When c is not the smallest possible negative number.
-    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
-        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
-      return None;
-    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
-    C -= 1;
-    break;
-  case CmpInst::ICMP_ULT:
-  case CmpInst::ICMP_UGE:
-    // Check for
-    //
-    // x ult c => x ule c - 1
-    // x uge c => x ugt c - 1
-    //
-    // When c is not zero.
-    if (C == 0)
-      return None;
-    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
-    C -= 1;
-    break;
-  case CmpInst::ICMP_SLE:
-  case CmpInst::ICMP_SGT:
-    // Check for
-    //
-    // x sle c => x slt c + 1
-    // x sgt c => s sge c + 1
-    //
-    // When c is not the largest possible signed integer.
-    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
-        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
-      return None;
-    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
-    C += 1;
-    break;
-  case CmpInst::ICMP_ULE:
-  case CmpInst::ICMP_UGT:
-    // Check for
-    //
-    // x ule c => x ult c + 1
-    // x ugt c => s uge c + 1
-    //
-    // When c is not the largest possible unsigned integer.
-    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
-        (Size == 64 && C == UINT64_MAX))
-      return None;
-    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
-    C += 1;
-    break;
-  }
-
-  // Check if the new constant is valid, and return the updated constant and
-  // predicate if it is.
-  if (Size == 32)
-    C = static_cast<uint32_t>(C);
-  if (!isLegalArithImmed(C))
-    return None;
-  return {{C, P}};
-}
-
-/// Determine whether or not it is possible to update the RHS and predicate of
-/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
-/// immediate.
-///
-/// \p MI - The G_ICMP instruction
-/// \p MatchInfo - The new RHS immediate and predicate on success
-///
-/// See tryAdjustICmpImmAndPred for valid transformations.
-bool matchAdjustICmpImmAndPred(
-    MachineInstr &MI, const MachineRegisterInfo &MRI,
-    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_ICMP);
-  Register RHS = MI.getOperand(3).getReg();
-  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
-  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
-    MatchInfo = *MaybeNewImmAndPred;
-    return true;
-  }
-  return false;
-}
-
-bool applyAdjustICmpImmAndPred(
-    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
-    MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
-  MIB.setInstrAndDebugLoc(MI);
-  MachineOperand &RHS = MI.getOperand(3);
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
-                               MatchInfo.first);
-  Observer.changingInstr(MI);
-  RHS.setReg(Cst->getOperand(0).getReg());
-  MI.getOperand(1).setPredicate(MatchInfo.second);
-  Observer.changedInstr(MI);
-  return true;
-}
-
-bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
-                  std::pair<unsigned, int> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  Register Src1Reg = MI.getOperand(1).getReg();
-  const LLT SrcTy = MRI.getType(Src1Reg);
-  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
-  auto LaneIdx = getSplatIndex(MI);
-  if (!LaneIdx)
-    return false;
-
-  // The lane idx should be within the first source vector.
-  if (*LaneIdx >= SrcTy.getNumElements())
-    return false;
-
-  if (DstTy != SrcTy)
-    return false;
-
-  LLT ScalarTy = SrcTy.getElementType();
-  unsigned ScalarSize = ScalarTy.getSizeInBits();
-
-  unsigned Opc = 0;
-  switch (SrcTy.getNumElements()) {
-  case 2:
-    if (ScalarSize == 64)
-      Opc = AArch64::G_DUPLANE64;
-    break;
-  case 4:
-    if (ScalarSize == 32)
-      Opc = AArch64::G_DUPLANE32;
-    break;
-  case 8:
-    if (ScalarSize == 16)
-      Opc = AArch64::G_DUPLANE16;
-    break;
-  case 16:
-    if (ScalarSize == 8)
-      Opc = AArch64::G_DUPLANE8;
-    break;
-  default:
-    break;
-  }
-  if (!Opc)
-    return false;
-
-  MatchInfo.first = Opc;
-  MatchInfo.second = *LaneIdx;
-  return true;
-}
-
-bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
-                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
-  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
-  B.setInstrAndDebugLoc(MI);
-  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
-  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
-               {MI.getOperand(1).getReg(), Lane});
-  MI.eraseFromParent();
-  return true;
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-
-namespace {
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-
-class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
-public:
-  AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
-
-  AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
-      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
-                     /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
-                     MinSize) {
-    if (!GeneratedRuleCfg.parseCommandLineOption())
-      report_fatal_error("Invalid rule identifier");
-  }
-
-  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
-                       MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
-                                               MachineInstr &MI,
-                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B);
-  AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
-  return Generated.tryCombineAll(Observer, MI, B, Helper);
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-
-class AArch64PostLegalizerLowering : public MachineFunctionPass {
-public:
-  static char ID;
-
-  AArch64PostLegalizerLowering();
-
-  StringRef getPassName() const override {
-    return "AArch64PostLegalizerLowering";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-} // end anonymous namespace
-
-void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetPassConfig>();
-  AU.setPreservesCFG();
-  getSelectionDAGFallbackAnalysisUsage(AU);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
-    : MachineFunctionPass(ID) {
-  initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
-}
-
-bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::FailedISel))
-    return false;
-  assert(MF.getProperties().hasProperty(
-             MachineFunctionProperties::Property::Legalized) &&
-         "Expected a legalized function?");
-  auto *TPC = &getAnalysis<TargetPassConfig>();
-  const Function &F = MF.getFunction();
-  AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
-  Combiner C(PCInfo, TPC);
-  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
-}
-
-char AArch64PostLegalizerLowering::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
-                      "Lower AArch64 MachineInstrs after legalization", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
-                    "Lower AArch64 MachineInstrs after legalization", false,
-                    false)
-
-namespace llvm {
-FunctionPass *createAArch64PostLegalizerLowering() {
-  return new AArch64PostLegalizerLowering();
-}
-} // end namespace llvm
+//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+/// 
+/// \file 
+/// Post-legalization lowering for instructions. 
+/// 
+/// This is used to offload pattern matching from the selector. 
+/// 
+/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually 
+/// a G_ZIP, G_UZP, etc. 
+/// 
+/// General optimization combines should be handled by either the 
+/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. 
+/// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "AArch64TargetMachine.h" 
+#include "AArch64GlobalISelUtils.h" 
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
+#include "llvm/CodeGen/GlobalISel/Combiner.h" 
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 
+#include "llvm/CodeGen/GlobalISel/Utils.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineInstrBuilder.h" 
+#include "llvm/CodeGen/MachineRegisterInfo.h" 
+#include "llvm/CodeGen/TargetOpcodes.h" 
+#include "llvm/CodeGen/TargetPassConfig.h" 
+#include "llvm/InitializePasses.h" 
+#include "llvm/Support/Debug.h" 
+ 
+#define DEBUG_TYPE "aarch64-postlegalizer-lowering" 
+ 
+using namespace llvm; 
+using namespace MIPatternMatch; 
+using namespace AArch64GISelUtils; 
+ 
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. 
+/// 
+/// Used for matching target-supported shuffles before codegen. 
+struct ShuffleVectorPseudo { 
+  unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) 
+  Register Dst; ///< Destination register. 
+  SmallVector<SrcOp, 2> SrcOps; ///< Source registers. 
+  ShuffleVectorPseudo(unsigned Opc, Register Dst, 
+                      std::initializer_list<SrcOp> SrcOps) 
+      : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; 
+  ShuffleVectorPseudo() {} 
+}; 
+ 
+/// Check if a vector shuffle corresponds to a REV instruction with the 
+/// specified blocksize. 
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, 
+                      unsigned BlockSize) { 
+  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 
+         "Only possible block sizes for REV are: 16, 32, 64"); 
+  assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); 
+ 
+  unsigned BlockElts = M[0] + 1; 
+ 
+  // If the first shuffle index is UNDEF, be optimistic. 
+  if (M[0] < 0) 
+    BlockElts = BlockSize / EltSize; 
+ 
+  if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) 
+    return false; 
+ 
+  for (unsigned i = 0; i < NumElts; ++i) { 
+    // Ignore undef indices. 
+    if (M[i] < 0) 
+      continue; 
+    if (static_cast<unsigned>(M[i]) != 
+        (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 
+      return false; 
+  } 
+ 
+  return true; 
+} 
+ 
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. 
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. 
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  if (NumElts % 2 != 0) 
+    return false; 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  for (unsigned i = 0; i < NumElts; i += 2) { 
+    if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || 
+        (M[i + 1] >= 0 && 
+         static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) 
+      return false; 
+  } 
+  return true; 
+} 
+ 
+/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector 
+/// sources of the shuffle are different. 
+static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, 
+                                                      unsigned NumElts) { 
+  // Look for the first non-undef element. 
+  auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); 
+  if (FirstRealElt == M.end()) 
+    return None; 
+ 
+  // Use APInt to handle overflow when calculating expected element. 
+  unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); 
+  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); 
+ 
+  // The following shuffle indices must be the successive elements after the 
+  // first real element. 
+  if (any_of( 
+          make_range(std::next(FirstRealElt), M.end()), 
+          [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) 
+    return None; 
+ 
+  // The index of an EXT is the first element if it is not UNDEF. 
+  // Watch out for the beginning UNDEFs. The EXT index should be the expected 
+  // value of the first element.  E.g. 
+  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. 
+  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. 
+  // ExpectedElt is the last mask index plus 1. 
+  uint64_t Imm = ExpectedElt.getZExtValue(); 
+  bool ReverseExt = false; 
+ 
+  // There are two difference cases requiring to reverse input vectors. 
+  // For example, for vector <4 x i32> we have the following cases, 
+  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) 
+  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) 
+  // For both cases, we finally use mask <5, 6, 7, 0>, which requires 
+  // to reverse two input vectors. 
+  if (Imm < NumElts) 
+    ReverseExt = true; 
+  else 
+    Imm -= NumElts; 
+  return std::make_pair(ReverseExt, Imm); 
+} 
+ 
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. 
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. 
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  for (unsigned i = 0; i != NumElts; ++i) { 
+    // Skip undef indices. 
+    if (M[i] < 0) 
+      continue; 
+    if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) 
+      return false; 
+  } 
+  return true; 
+} 
+ 
+/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. 
+/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. 
+static bool isZipMask(ArrayRef<int> M, unsigned NumElts, 
+                      unsigned &WhichResult) { 
+  if (NumElts % 2 != 0) 
+    return false; 
+ 
+  // 0 means use ZIP1, 1 means use ZIP2. 
+  WhichResult = (M[0] == 0 ? 0 : 1); 
+  unsigned Idx = WhichResult * NumElts / 2; 
+  for (unsigned i = 0; i != NumElts; i += 2) { 
+      if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || 
+          (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) 
+        return false; 
+    Idx += 1; 
+  } 
+  return true; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a 
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. 
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  Register Src = MI.getOperand(1).getReg(); 
+  LLT Ty = MRI.getType(Dst); 
+  unsigned EltSize = Ty.getScalarSizeInBits(); 
+ 
+  // Element size for a rev cannot be 64. 
+  if (EltSize == 64) 
+    return false; 
+ 
+  unsigned NumElts = Ty.getNumElements(); 
+ 
+  // Try to produce G_REV64 
+  if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { 
+    MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); 
+    return true; 
+  } 
+ 
+  // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. 
+  // This should be identical to above, but with a constant 32 and constant 
+  // 16. 
+  return false; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 
+/// a G_TRN1 or G_TRN2 instruction. 
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 
+/// a G_UZP1 or G_UZP2 instruction. 
+/// 
+/// \param [in] MI - The shuffle vector instruction. 
+/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. 
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  unsigned WhichResult; 
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  unsigned NumElts = MRI.getType(Dst).getNumElements(); 
+  if (!isZipMask(ShuffleMask, NumElts, WhichResult)) 
+    return false; 
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 
+  return true; 
+} 
+ 
+/// Helper function for matchDup. 
+static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, 
+                                        MachineRegisterInfo &MRI, 
+                                        ShuffleVectorPseudo &MatchInfo) { 
+  if (Lane != 0) 
+    return false; 
+ 
+  // Try to match a vector splat operation into a dup instruction. 
+  // We're looking for this pattern: 
+  // 
+  // %scalar:gpr(s64) = COPY $x0 
+  // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 
+  // %cst0:gpr(s32) = G_CONSTANT i32 0 
+  // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 
+  // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 
+  // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) 
+  // 
+  // ...into: 
+  // %splat = G_DUP %scalar 
+ 
+  // Begin matching the insert. 
+  auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, 
+                             MI.getOperand(1).getReg(), MRI); 
+  if (!InsMI) 
+    return false; 
+  // Match the undef vector operand. 
+  if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), 
+                    MRI)) 
+    return false; 
+ 
+  // Match the index constant 0. 
+  if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) 
+    return false; 
+ 
+  MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), 
+                                  {InsMI->getOperand(2).getReg()}); 
+  return true; 
+} 
+ 
+/// Helper function for matchDup. 
+static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, 
+                                    MachineRegisterInfo &MRI, 
+                                    ShuffleVectorPseudo &MatchInfo) { 
+  assert(Lane >= 0 && "Expected positive lane?"); 
+  // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the 
+  // lane's definition directly. 
+  auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, 
+                                  MI.getOperand(1).getReg(), MRI); 
+  if (!BuildVecMI) 
+    return false; 
+  Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); 
+  MatchInfo = 
+      ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); 
+  return true; 
+} 
+ 
+static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  auto MaybeLane = getSplatIndex(MI); 
+  if (!MaybeLane) 
+    return false; 
+  int Lane = *MaybeLane; 
+  // If this is undef splat, generate it via "just" vdup, if possible. 
+  if (Lane < 0) 
+    Lane = 0; 
+  if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) 
+    return true; 
+  if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) 
+    return true; 
+  return false; 
+} 
+ 
+static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                     ShuffleVectorPseudo &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  Register Dst = MI.getOperand(0).getReg(); 
+  auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), 
+                            MRI.getType(Dst).getNumElements()); 
+  if (!ExtInfo) 
+    return false; 
+  bool ReverseExt; 
+  uint64_t Imm; 
+  std::tie(ReverseExt, Imm) = *ExtInfo; 
+  Register V1 = MI.getOperand(1).getReg(); 
+  Register V2 = MI.getOperand(2).getReg(); 
+  if (ReverseExt) 
+    std::swap(V1, V2); 
+  uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; 
+  Imm *= ExtFactor; 
+  MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); 
+  return true; 
+} 
+ 
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. 
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. 
+static bool applyShuffleVectorPseudo(MachineInstr &MI, 
+                                     ShuffleVectorPseudo &MatchInfo) { 
+  MachineIRBuilder MIRBuilder(MI); 
+  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. 
+/// Special-cased because the constant operand must be emitted as a G_CONSTANT 
+/// for the imported tablegen patterns to work. 
+static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { 
+  MachineIRBuilder MIRBuilder(MI); 
+  // Tablegen patterns expect an i32 G_CONSTANT as the final op. 
+  auto Cst = 
+      MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); 
+  MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, 
+                        {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// isVShiftRImm - Check if this is a valid vector for the immediate 
+/// operand of a vector shift right operation. The value must be in the range: 
+///   1 <= Value <= ElementBits for a right shift. 
+static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, 
+                         int64_t &Cnt) { 
+  assert(Ty.isVector() && "vector shift count is not a vector type"); 
+  MachineInstr *MI = MRI.getVRegDef(Reg); 
+  auto Cst = getBuildVectorConstantSplat(*MI, MRI); 
+  if (!Cst) 
+    return false; 
+  Cnt = *Cst; 
+  int64_t ElementBits = Ty.getScalarSizeInBits(); 
+  return Cnt >= 1 && Cnt <= ElementBits; 
+} 
+ 
+/// Match a vector G_ASHR or G_LSHR with a valid immediate shift. 
+static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                              int64_t &Imm) { 
+  assert(MI.getOpcode() == TargetOpcode::G_ASHR || 
+         MI.getOpcode() == TargetOpcode::G_LSHR); 
+  LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 
+  if (!Ty.isVector()) 
+    return false; 
+  return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); 
+} 
+ 
+static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                              int64_t &Imm) { 
+  unsigned Opc = MI.getOpcode(); 
+  assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); 
+  unsigned NewOpc = 
+      Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; 
+  MachineIRBuilder MIB(MI); 
+  auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); 
+  MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+/// Determine if it is possible to modify the \p RHS and predicate \p P of a 
+/// G_ICMP instruction such that the right-hand side is an arithmetic immediate. 
+/// 
+/// \returns A pair containing the updated immediate and predicate which may 
+/// be used to optimize the instruction. 
+/// 
+/// \note This assumes that the comparison has been legalized. 
+Optional<std::pair<uint64_t, CmpInst::Predicate>> 
+tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, 
+                          const MachineRegisterInfo &MRI) { 
+  const auto &Ty = MRI.getType(RHS); 
+  if (Ty.isVector()) 
+    return None; 
+  unsigned Size = Ty.getSizeInBits(); 
+  assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); 
+ 
+  // If the RHS is not a constant, or the RHS is already a valid arithmetic 
+  // immediate, then there is nothing to change. 
+  auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 
+  if (!ValAndVReg) 
+    return None; 
+  uint64_t C = ValAndVReg->Value.getZExtValue(); 
+  if (isLegalArithImmed(C)) 
+    return None; 
+ 
+  // We have a non-arithmetic immediate. Check if adjusting the immediate and 
+  // adjusting the predicate will result in a legal arithmetic immediate. 
+  switch (P) { 
+  default: 
+    return None; 
+  case CmpInst::ICMP_SLT: 
+  case CmpInst::ICMP_SGE: 
+    // Check for 
+    // 
+    // x slt c => x sle c - 1 
+    // x sge c => x sgt c - 1 
+    // 
+    // When c is not the smallest possible negative number. 
+    if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 
+        (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 
+      return None; 
+    P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 
+    C -= 1; 
+    break; 
+  case CmpInst::ICMP_ULT: 
+  case CmpInst::ICMP_UGE: 
+    // Check for 
+    // 
+    // x ult c => x ule c - 1 
+    // x uge c => x ugt c - 1 
+    // 
+    // When c is not zero. 
+    if (C == 0) 
+      return None; 
+    P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 
+    C -= 1; 
+    break; 
+  case CmpInst::ICMP_SLE: 
+  case CmpInst::ICMP_SGT: 
+    // Check for 
+    // 
+    // x sle c => x slt c + 1 
+    // x sgt c => s sge c + 1 
+    // 
+    // When c is not the largest possible signed integer. 
+    if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 
+        (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 
+      return None; 
+    P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 
+    C += 1; 
+    break; 
+  case CmpInst::ICMP_ULE: 
+  case CmpInst::ICMP_UGT: 
+    // Check for 
+    // 
+    // x ule c => x ult c + 1 
+    // x ugt c => s uge c + 1 
+    // 
+    // When c is not the largest possible unsigned integer. 
+    if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 
+        (Size == 64 && C == UINT64_MAX)) 
+      return None; 
+    P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 
+    C += 1; 
+    break; 
+  } 
+ 
+  // Check if the new constant is valid, and return the updated constant and 
+  // predicate if it is. 
+  if (Size == 32) 
+    C = static_cast<uint32_t>(C); 
+  if (!isLegalArithImmed(C)) 
+    return None; 
+  return {{C, P}}; 
+} 
+ 
+/// Determine whether or not it is possible to update the RHS and predicate of 
+/// a G_ICMP instruction such that the RHS will be selected as an arithmetic 
+/// immediate. 
+/// 
+/// \p MI - The G_ICMP instruction 
+/// \p MatchInfo - The new RHS immediate and predicate on success 
+/// 
+/// See tryAdjustICmpImmAndPred for valid transformations. 
+bool matchAdjustICmpImmAndPred( 
+    MachineInstr &MI, const MachineRegisterInfo &MRI, 
+    std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_ICMP); 
+  Register RHS = MI.getOperand(3).getReg(); 
+  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 
+  if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { 
+    MatchInfo = *MaybeNewImmAndPred; 
+    return true; 
+  } 
+  return false; 
+} 
+ 
+bool applyAdjustICmpImmAndPred( 
+    MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, 
+    MachineIRBuilder &MIB, GISelChangeObserver &Observer) { 
+  MIB.setInstrAndDebugLoc(MI); 
+  MachineOperand &RHS = MI.getOperand(3); 
+  MachineRegisterInfo &MRI = *MIB.getMRI(); 
+  auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), 
+                               MatchInfo.first); 
+  Observer.changingInstr(MI); 
+  RHS.setReg(Cst->getOperand(0).getReg()); 
+  MI.getOperand(1).setPredicate(MatchInfo.second); 
+  Observer.changedInstr(MI); 
+  return true; 
+} 
+ 
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                  std::pair<unsigned, int> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  Register Src1Reg = MI.getOperand(1).getReg(); 
+  const LLT SrcTy = MRI.getType(Src1Reg); 
+  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 
+ 
+  auto LaneIdx = getSplatIndex(MI); 
+  if (!LaneIdx) 
+    return false; 
+ 
+  // The lane idx should be within the first source vector. 
+  if (*LaneIdx >= SrcTy.getNumElements()) 
+    return false; 
+ 
+  if (DstTy != SrcTy) 
+    return false; 
+ 
+  LLT ScalarTy = SrcTy.getElementType(); 
+  unsigned ScalarSize = ScalarTy.getSizeInBits(); 
+ 
+  unsigned Opc = 0; 
+  switch (SrcTy.getNumElements()) { 
+  case 2: 
+    if (ScalarSize == 64) 
+      Opc = AArch64::G_DUPLANE64; 
+    break; 
+  case 4: 
+    if (ScalarSize == 32) 
+      Opc = AArch64::G_DUPLANE32; 
+    break; 
+  case 8: 
+    if (ScalarSize == 16) 
+      Opc = AArch64::G_DUPLANE16; 
+    break; 
+  case 16: 
+    if (ScalarSize == 8) 
+      Opc = AArch64::G_DUPLANE8; 
+    break; 
+  default: 
+    break; 
+  } 
+  if (!Opc) 
+    return false; 
+ 
+  MatchInfo.first = Opc; 
+  MatchInfo.second = *LaneIdx; 
+  return true; 
+} 
+ 
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 
+                  MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { 
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 
+  B.setInstrAndDebugLoc(MI); 
+  auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); 
+  B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, 
+               {MI.getOperand(1).getReg(), Lane}); 
+  MI.eraseFromParent(); 
+  return true; 
+} 
+ 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 
+ 
+namespace { 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 
+ 
+class AArch64PostLegalizerLoweringInfo : public CombinerInfo { 
+public: 
+  AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; 
+ 
+  AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) 
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 
+                     /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, 
+                     MinSize) { 
+    if (!GeneratedRuleCfg.parseCommandLineOption()) 
+      report_fatal_error("Invalid rule identifier"); 
+  } 
+ 
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 
+                       MachineIRBuilder &B) const override; 
+}; 
+ 
+bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, 
+                                               MachineInstr &MI, 
+                                               MachineIRBuilder &B) const { 
+  CombinerHelper Helper(Observer, B); 
+  AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); 
+  return Generated.tryCombineAll(Observer, MI, B, Helper); 
+} 
+ 
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 
+#include "AArch64GenPostLegalizeGILowering.inc" 
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 
+ 
+class AArch64PostLegalizerLowering : public MachineFunctionPass { 
+public: 
+  static char ID; 
+ 
+  AArch64PostLegalizerLowering(); 
+ 
+  StringRef getPassName() const override { 
+    return "AArch64PostLegalizerLowering"; 
+  } 
+ 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+  void getAnalysisUsage(AnalysisUsage &AU) const override; 
+}; 
+} // end anonymous namespace 
+ 
+void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { 
+  AU.addRequired<TargetPassConfig>(); 
+  AU.setPreservesCFG(); 
+  getSelectionDAGFallbackAnalysisUsage(AU); 
+  MachineFunctionPass::getAnalysisUsage(AU); 
+} 
+ 
+AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() 
+    : MachineFunctionPass(ID) { 
+  initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); 
+} 
+ 
+bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { 
+  if (MF.getProperties().hasProperty( 
+          MachineFunctionProperties::Property::FailedISel)) 
+    return false; 
+  assert(MF.getProperties().hasProperty( 
+             MachineFunctionProperties::Property::Legalized) && 
+         "Expected a legalized function?"); 
+  auto *TPC = &getAnalysis<TargetPassConfig>(); 
+  const Function &F = MF.getFunction(); 
+  AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); 
+  Combiner C(PCInfo, TPC); 
+  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 
+} 
+ 
+char AArch64PostLegalizerLowering::ID = 0; 
+INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, 
+                      "Lower AArch64 MachineInstrs after legalization", false, 
+                      false) 
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 
+INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, 
+                    "Lower AArch64 MachineInstrs after legalization", false, 
+                    false) 
+ 
+namespace llvm { 
+FunctionPass *createAArch64PostLegalizerLowering() { 
+  return new AArch64PostLegalizerLowering(); 
+} 
+} // end namespace llvm 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 2f882ecb1f..00436b5924 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -1,187 +1,187 @@
-//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass does post-instruction-selection optimizations in the GlobalISel
-// pipeline, before the rest of codegen runs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-post-select-optimize"
-
-using namespace llvm;
-
-namespace {
-class AArch64PostSelectOptimize : public MachineFunctionPass {
-public:
-  static char ID;
-
-  AArch64PostSelectOptimize();
-
-  StringRef getPassName() const override {
-    return "AArch64 Post Select Optimizer";
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-private:
-  bool optimizeNZCVDefs(MachineBasicBlock &MBB);
-};
-} // end anonymous namespace
-
-void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetPassConfig>();
-  AU.setPreservesCFG();
-  getSelectionDAGFallbackAnalysisUsage(AU);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostSelectOptimize::AArch64PostSelectOptimize()
-    : MachineFunctionPass(ID) {
-  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
-}
-
-unsigned getNonFlagSettingVariant(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return 0;
-  case AArch64::SUBSXrr:
-    return AArch64::SUBXrr;
-  case AArch64::SUBSWrr:
-    return AArch64::SUBWrr;
-  case AArch64::SUBSXrs:
-    return AArch64::SUBXrs;
-  case AArch64::SUBSXri:
-    return AArch64::SUBXri;
-  case AArch64::SUBSWri:
-    return AArch64::SUBWri;
-  }
-}
-
-bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
-  // Consider the following code:
-  //  FCMPSrr %0, %1, implicit-def $nzcv
-  //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
-  //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
-  //  FCMPSrr %0, %1, implicit-def $nzcv
-  //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
-  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
-  // when we have a single IR fcmp being used by two selects. During selection,
-  // to ensure that there can be no clobbering of nzcv between the fcmp and the
-  // csel, we have to generate an fcmp immediately before each csel is
-  // selected.
-  // However, often we can essentially CSE these together later in MachineCSE.
-  // This doesn't work though if there are unrelated flag-setting instructions
-  // in between the two FCMPs. In this case, the SUBS defines NZCV
-  // but it doesn't have any users, being overwritten by the second FCMP.
-  //
-  // Our solution here is to try to convert flag setting operations between
-  // a interval of identical FCMPs, so that CSE will be able to eliminate one.
-  bool Changed = false;
-  const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
-
-  // The first step is to find the first and last FCMPs. If we have found
-  // at least two, then set the limit of the bottom-up walk to the first FCMP
-  // found since we're only interested in dealing with instructions between
-  // them.
-  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
-  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
-    if (MI.getOpcode() == AArch64::FCMPSrr ||
-        MI.getOpcode() == AArch64::FCMPDrr) {
-      if (!FirstCmp)
-        FirstCmp = &MI;
-      else
-        LastCmp = &MI;
-    }
-  }
-
-  // In addition to converting flag-setting ops in fcmp ranges into non-flag
-  // setting ops, across the whole basic block we also detect when nzcv
-  // implicit-defs are dead, and mark them as dead. Peephole optimizations need
-  // this information later.
-
-  LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
-  LRU.addLiveOuts(MBB);
-  bool NZCVDead = LRU.available(AArch64::NZCV);
-  bool InsideCmpRange = false;
-  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
-    LRU.stepBackward(II);
-
-    if (LastCmp) { // There's a range present in this block.
-      // If we're inside an fcmp range, look for begin instruction.
-      if (InsideCmpRange && &II == FirstCmp)
-        InsideCmpRange = false;
-      else if (&II == LastCmp)
-        InsideCmpRange = true;
-    }
-
-    // Did this instruction define NZCV?
-    bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
-    if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
-      // If we have a def and NZCV is dead, then we may convert this op.
-      unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
-      int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
-      if (DeadNZCVIdx != -1) {
-        // If we're inside an fcmp range, then convert flag setting ops.
-        if (InsideCmpRange && NewOpc) {
-          LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
-                               "op in fcmp range: "
-                            << II);
-          II.setDesc(TII->get(NewOpc));
-          II.RemoveOperand(DeadNZCVIdx);
-          Changed |= true;
-        } else {
-          // Otherwise, we just set the nzcv imp-def operand to be dead, so the
-          // peephole optimizations can optimize them further.
-          II.getOperand(DeadNZCVIdx).setIsDead();
-        }
-      }
-    }
-
-    NZCVDead = NZCVDeadAtCurrInstr;
-  }
-  return Changed;
-}
-
-bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::FailedISel))
-    return false;
-  assert(MF.getProperties().hasProperty(
-             MachineFunctionProperties::Property::Selected) &&
-         "Expected a selected MF");
-
-  bool Changed = false;
-  for (auto &BB : MF)
-    Changed |= optimizeNZCVDefs(BB);
-  return true;
-}
-
-char AArch64PostSelectOptimize::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
-                      "Optimize AArch64 selected instructions",
-                      false, false)
-INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
-                    "Optimize AArch64 selected instructions", false,
-                    false)
-
-namespace llvm {
-FunctionPass *createAArch64PostSelectOptimize() {
-  return new AArch64PostSelectOptimize();
-}
-} // end namespace llvm
+//=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 
+// 
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
+// See https://llvm.org/LICENSE.txt for license information. 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
+// 
+//===----------------------------------------------------------------------===// 
+// 
+// This pass does post-instruction-selection optimizations in the GlobalISel 
+// pipeline, before the rest of codegen runs. 
+// 
+//===----------------------------------------------------------------------===// 
+ 
+#include "AArch64.h" 
+#include "AArch64TargetMachine.h" 
+#include "MCTargetDesc/AArch64MCTargetDesc.h" 
+#include "llvm/CodeGen/MachineBasicBlock.h" 
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+#include "llvm/CodeGen/MachineOperand.h" 
+#include "llvm/CodeGen/TargetPassConfig.h" 
+#include "llvm/Support/Debug.h" 
+ 
+#define DEBUG_TYPE "aarch64-post-select-optimize" 
+ 
+using namespace llvm; 
+ 
+namespace { 
+class AArch64PostSelectOptimize : public MachineFunctionPass { 
+public: 
+  static char ID; 
+ 
+  AArch64PostSelectOptimize(); 
+ 
+  StringRef getPassName() const override { 
+    return "AArch64 Post Select Optimizer"; 
+  } 
+ 
+  bool runOnMachineFunction(MachineFunction &MF) override; 
+ 
+  void getAnalysisUsage(AnalysisUsage &AU) const override; 
+ 
+private: 
+  bool optimizeNZCVDefs(MachineBasicBlock &MBB); 
+}; 
+} // end anonymous namespace 
+ 
+void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 
+  AU.addRequired<TargetPassConfig>(); 
+  AU.setPreservesCFG(); 
+  getSelectionDAGFallbackAnalysisUsage(AU); 
+  MachineFunctionPass::getAnalysisUsage(AU); 
+} 
+ 
+AArch64PostSelectOptimize::AArch64PostSelectOptimize() 
+    : MachineFunctionPass(ID) { 
+  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 
+} 
+ 
+unsigned getNonFlagSettingVariant(unsigned Opc) { 
+  switch (Opc) { 
+  default: 
+    return 0; 
+  case AArch64::SUBSXrr: 
+    return AArch64::SUBXrr; 
+  case AArch64::SUBSWrr: 
+    return AArch64::SUBWrr; 
+  case AArch64::SUBSXrs: 
+    return AArch64::SUBXrs; 
+  case AArch64::SUBSXri: 
+    return AArch64::SUBXri; 
+  case AArch64::SUBSWri: 
+    return AArch64::SUBWri; 
+  } 
+} 
+ 
+bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 
+  // Consider the following code: 
+  //  FCMPSrr %0, %1, implicit-def $nzcv 
+  //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 
+  //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 
+  //  FCMPSrr %0, %1, implicit-def $nzcv 
+  //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 
+  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 
+  // when we have a single IR fcmp being used by two selects. During selection, 
+  // to ensure that there can be no clobbering of nzcv between the fcmp and the 
+  // csel, we have to generate an fcmp immediately before each csel is 
+  // selected. 
+  // However, often we can essentially CSE these together later in MachineCSE. 
+  // This doesn't work though if there are unrelated flag-setting instructions 
+  // in between the two FCMPs. In this case, the SUBS defines NZCV 
+  // but it doesn't have any users, being overwritten by the second FCMP. 
+  // 
+  // Our solution here is to try to convert flag setting operations between 
+  // a interval of identical FCMPs, so that CSE will be able to eliminate one. 
+  bool Changed = false; 
+  const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); 
+ 
+  // The first step is to find the first and last FCMPs. If we have found 
+  // at least two, then set the limit of the bottom-up walk to the first FCMP 
+  // found since we're only interested in dealing with instructions between 
+  // them. 
+  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 
+  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 
+    if (MI.getOpcode() == AArch64::FCMPSrr || 
+        MI.getOpcode() == AArch64::FCMPDrr) { 
+      if (!FirstCmp) 
+        FirstCmp = &MI; 
+      else 
+        LastCmp = &MI; 
+    } 
+  } 
+ 
+  // In addition to converting flag-setting ops in fcmp ranges into non-flag 
+  // setting ops, across the whole basic block we also detect when nzcv 
+  // implicit-defs are dead, and mark them as dead. Peephole optimizations need 
+  // this information later. 
+ 
+  LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 
+  LRU.addLiveOuts(MBB); 
+  bool NZCVDead = LRU.available(AArch64::NZCV); 
+  bool InsideCmpRange = false; 
+  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 
+    LRU.stepBackward(II); 
+ 
+    if (LastCmp) { // There's a range present in this block. 
+      // If we're inside an fcmp range, look for begin instruction. 
+      if (InsideCmpRange && &II == FirstCmp) 
+        InsideCmpRange = false; 
+      else if (&II == LastCmp) 
+        InsideCmpRange = true; 
+    } 
+ 
+    // Did this instruction define NZCV? 
+    bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 
+    if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 
+      // If we have a def and NZCV is dead, then we may convert this op. 
+      unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 
+      int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 
+      if (DeadNZCVIdx != -1) { 
+        // If we're inside an fcmp range, then convert flag setting ops. 
+        if (InsideCmpRange && NewOpc) { 
+          LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 
+                               "op in fcmp range: " 
+                            << II); 
+          II.setDesc(TII->get(NewOpc)); 
+          II.RemoveOperand(DeadNZCVIdx); 
+          Changed |= true; 
+        } else { 
+          // Otherwise, we just set the nzcv imp-def operand to be dead, so the 
+          // peephole optimizations can optimize them further. 
+          II.getOperand(DeadNZCVIdx).setIsDead(); 
+        } 
+      } 
+    } 
+ 
+    NZCVDead = NZCVDeadAtCurrInstr; 
+  } 
+  return Changed; 
+} 
+ 
+bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 
+  if (MF.getProperties().hasProperty( 
+          MachineFunctionProperties::Property::FailedISel)) 
+    return false; 
+  assert(MF.getProperties().hasProperty( 
+             MachineFunctionProperties::Property::Selected) && 
+         "Expected a selected MF"); 
+ 
+  bool Changed = false; 
+  for (auto &BB : MF) 
+    Changed |= optimizeNZCVDefs(BB); 
+  return true; 
+} 
+ 
+char AArch64PostSelectOptimize::ID = 0; 
+INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 
+                      "Optimize AArch64 selected instructions", 
+                      false, false) 
+INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 
+                    "Optimize AArch64 selected instructions", false, 
+                    false) 
+ 
+namespace llvm { 
+FunctionPass *createAArch64PostSelectOptimize() { 
+  return new AArch64PostSelectOptimize(); 
+} 
+} // end namespace llvm 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 5f9b64e274..2686f6dc46 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -104,16 +104,16 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
     return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_SHUFFLE_VECTOR:
     return Helper.tryCombineShuffleVector(MI);
-  case TargetOpcode::G_MEMCPY:
-  case TargetOpcode::G_MEMMOVE:
-  case TargetOpcode::G_MEMSET: {
-    // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
-    // heuristics decide.
-    unsigned MaxLen = EnableOpt ? 0 : 32;
-    // Try to inline memcpy type calls if optimizations are enabled.
-    return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
-  }
+  case TargetOpcode::G_MEMCPY: 
+  case TargetOpcode::G_MEMMOVE: 
+  case TargetOpcode::G_MEMSET: { 
+    // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other 
+    // heuristics decide. 
+    unsigned MaxLen = EnableOpt ? 0 : 32; 
+    // Try to inline memcpy type calls if optimizations are enabled. 
+    return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; 
   }
+  } 
 
   return false;
 }
@@ -188,7 +188,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
 
 
 namespace llvm {
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) { 
   return new AArch64PreLegalizerCombiner(IsOptNone);
 }
 } // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index c76c43389b..e26fe60d93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,7 +13,7 @@
 
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLExtras.h" 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -466,10 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
-bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
-                                               const MachineRegisterInfo &MRI,
-                                               const TargetRegisterInfo &TRI,
-                                               unsigned Depth) const {
+bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 
+                                               const MachineRegisterInfo &MRI, 
+                                               const TargetRegisterInfo &TRI, 
+                                               unsigned Depth) const { 
   unsigned Op = MI.getOpcode();
 
   // Do we have an explicit floating point instruction?
@@ -481,30 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
   if (Op != TargetOpcode::COPY && !MI.isPHI())
     return false;
 
-  // Check if we already know the register bank.
-  auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
-  if (RB == &AArch64::FPRRegBank)
-    return true;
-  if (RB == &AArch64::GPRRegBank)
-    return false;
-
-  // We don't know anything.
-  //
-  // If we have a phi, we may be able to infer that it will be assigned a FPR
-  // based off of its inputs.
-  if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
-    return false;
-
-  return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
-    return Op.isReg() &&
-           onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
-  });
+  // Check if we already know the register bank. 
+  auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 
+  if (RB == &AArch64::FPRRegBank) 
+    return true; 
+  if (RB == &AArch64::GPRRegBank) 
+    return false; 
+ 
+  // We don't know anything. 
+  // 
+  // If we have a phi, we may be able to infer that it will be assigned a FPR 
+  // based off of its inputs. 
+  if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 
+    return false; 
+ 
+  return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 
+    return Op.isReg() && 
+           onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 
+  }); 
 }
 
 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
                                          const MachineRegisterInfo &MRI,
-                                         const TargetRegisterInfo &TRI,
-                                         unsigned Depth) const {
+                                         const TargetRegisterInfo &TRI, 
+                                         unsigned Depth) const { 
   switch (MI.getOpcode()) {
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
@@ -513,13 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
   default:
     break;
   }
-  return hasFPConstraints(MI, MRI, TRI, Depth);
+  return hasFPConstraints(MI, MRI, TRI, Depth); 
 }
 
-bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
-                                            const MachineRegisterInfo &MRI,
-                                            const TargetRegisterInfo &TRI,
-                                            unsigned Depth) const {
+bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 
+                                            const MachineRegisterInfo &MRI, 
+                                            const TargetRegisterInfo &TRI, 
+                                            unsigned Depth) const { 
   switch (MI.getOpcode()) {
   case AArch64::G_DUP:
   case TargetOpcode::G_SITOFP:
@@ -530,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
   default:
     break;
   }
-  return hasFPConstraints(MI, MRI, TRI, Depth);
+  return hasFPConstraints(MI, MRI, TRI, Depth); 
 }
 
 const RegisterBankInfo::InstructionMapping &
@@ -680,18 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case TargetOpcode::G_SITOFP:
-  case TargetOpcode::G_UITOFP: {
+  case TargetOpcode::G_UITOFP: { 
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
       break;
-    // Integer to FP conversions don't necessarily happen between GPR -> FPR
-    // regbanks. They can also be done within an FPR register.
-    Register SrcReg = MI.getOperand(1).getReg();
-    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
-      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-    else
-      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+    // Integer to FP conversions don't necessarily happen between GPR -> FPR 
+    // regbanks. They can also be done within an FPR register. 
+    Register SrcReg = MI.getOperand(1).getReg(); 
+    if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 
+    else 
+      OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 
     break;
-  }
+  } 
   case TargetOpcode::G_FPTOSI:
   case TargetOpcode::G_FPTOUI:
     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -729,8 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // assume this was a floating point load in the IR.
         // If it was not, we would have had a bitcast before
         // reaching that instruction.
-        // Int->FP conversion operations are also captured in onlyDefinesFP().
-        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
+        // Int->FP conversion operations are also captured in onlyDefinesFP(). 
+        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) { 
           OpRegBankIdx[0] = PMI_FirstFPR;
           break;
         }
@@ -853,7 +853,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
-  case TargetOpcode::G_BUILD_VECTOR: {
+  case TargetOpcode::G_BUILD_VECTOR: { 
     // If the first source operand belongs to a FPR register bank, then make
     // sure that we preserve that.
     if (OpRegBankIdx[1] != PMI_FirstGPR)
@@ -864,17 +864,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     // Get the instruction that defined the source operand reg, and check if
     // it's a floating point operation. Or, if it's a type like s16 which
-    // doesn't have a exact size gpr register class. The exception is if the
-    // build_vector has all constant operands, which may be better to leave as
-    // gpr without copies, so it can be matched in imported patterns.
+    // doesn't have a exact size gpr register class. The exception is if the 
+    // build_vector has all constant operands, which may be better to leave as 
+    // gpr without copies, so it can be matched in imported patterns. 
     MachineInstr *DefMI = MRI.getVRegDef(VReg);
     unsigned DefOpc = DefMI->getOpcode();
     const LLT SrcTy = MRI.getType(VReg);
-    if (all_of(MI.operands(), [&](const MachineOperand &Op) {
-          return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
-                                   TargetOpcode::G_CONSTANT;
-        }))
-      break;
+    if (all_of(MI.operands(), [&](const MachineOperand &Op) { 
+          return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 
+                                   TargetOpcode::G_CONSTANT; 
+        })) 
+      break; 
     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
         SrcTy.getSizeInBits() < 32) {
       // Have a floating point op.
@@ -885,30 +885,30 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     }
     break;
   }
-  case TargetOpcode::G_VECREDUCE_FADD:
-  case TargetOpcode::G_VECREDUCE_FMUL:
-  case TargetOpcode::G_VECREDUCE_FMAX:
-  case TargetOpcode::G_VECREDUCE_FMIN:
-  case TargetOpcode::G_VECREDUCE_ADD:
-  case TargetOpcode::G_VECREDUCE_MUL:
-  case TargetOpcode::G_VECREDUCE_AND:
-  case TargetOpcode::G_VECREDUCE_OR:
-  case TargetOpcode::G_VECREDUCE_XOR:
-  case TargetOpcode::G_VECREDUCE_SMAX:
-  case TargetOpcode::G_VECREDUCE_SMIN:
-  case TargetOpcode::G_VECREDUCE_UMAX:
-  case TargetOpcode::G_VECREDUCE_UMIN:
-    // Reductions produce a scalar value from a vector, the scalar should be on
-    // FPR bank.
-    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-    break;
-  case TargetOpcode::G_VECREDUCE_SEQ_FADD:
-  case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
-    // These reductions also take a scalar accumulator input.
-    // Assign them FPR for now.
-    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
-    break;
-  }
+  case TargetOpcode::G_VECREDUCE_FADD: 
+  case TargetOpcode::G_VECREDUCE_FMUL: 
+  case TargetOpcode::G_VECREDUCE_FMAX: 
+  case TargetOpcode::G_VECREDUCE_FMIN: 
+  case TargetOpcode::G_VECREDUCE_ADD: 
+  case TargetOpcode::G_VECREDUCE_MUL: 
+  case TargetOpcode::G_VECREDUCE_AND: 
+  case TargetOpcode::G_VECREDUCE_OR: 
+  case TargetOpcode::G_VECREDUCE_XOR: 
+  case TargetOpcode::G_VECREDUCE_SMAX: 
+  case TargetOpcode::G_VECREDUCE_SMIN: 
+  case TargetOpcode::G_VECREDUCE_UMAX: 
+  case TargetOpcode::G_VECREDUCE_UMIN: 
+    // Reductions produce a scalar value from a vector, the scalar should be on 
+    // FPR bank. 
+    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 
+    break; 
+  case TargetOpcode::G_VECREDUCE_SEQ_FADD: 
+  case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 
+    // These reductions also take a scalar accumulator input. 
+    // Assign them FPR for now. 
+    OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 
+    break; 
+  } 
 
   // Finally construct the computed mapping.
   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 019017bc3e..c8cfe53299 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -114,20 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   const InstructionMapping &
   getSameKindOfOperandsMapping(const MachineInstr &MI) const;
 
-  /// Maximum recursion depth for hasFPConstraints.
-  const unsigned MaxFPRSearchDepth = 2;
-
-  /// \returns true if \p MI only uses and defines FPRs.
+  /// Maximum recursion depth for hasFPConstraints. 
+  const unsigned MaxFPRSearchDepth = 2; 
+ 
+  /// \returns true if \p MI only uses and defines FPRs. 
   bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
-  /// \returns true if \p MI only uses FPRs.
+  /// \returns true if \p MI only uses FPRs. 
   bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                  const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                  const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
-  /// \returns true if \p MI only defines FPRs.
+  /// \returns true if \p MI only defines FPRs. 
   bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
-                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+                     const TargetRegisterInfo &TRI, unsigned Depth = 0) const; 
 
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 2cbe8315bc..77b7c09946 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -763,8 +763,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
   bool IsImm8 = int8_t(Imm) == Imm;
   bool IsImm16 = int16_t(Imm & ~0xff) == Imm;
 
-  if (std::is_same<int8_t, std::make_signed_t<T>>::value ||
-      std::is_same<int8_t, T>::value)
+  if (std::is_same<int8_t, std::make_signed_t<T>>::value || 
+      std::is_same<int8_t, T>::value) 
     return IsImm8 || uint8_t(Imm) == Imm;
 
   if (std::is_same<int16_t, std::make_signed_t<T>>::value)
@@ -776,8 +776,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
 /// Returns true if Imm is valid for ADD/SUB.
 template <typename T>
 static inline bool isSVEAddSubImm(int64_t Imm) {
-  bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value ||
-                 std::is_same<int8_t, T>::value;
+  bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value || 
+                 std::is_same<int8_t, T>::value; 
   return uint8_t(Imm) == Imm || (!IsInt8t && uint16_t(Imm & ~0xff) == Imm);
 }
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 75a9f2f5c8..33448cef46 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -758,7 +758,7 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
   assert(TheTriple.isOSBinFormatELF() && "Invalid target");
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+  bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32; 
   return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/true,
                                   IsILP32);
 }
@@ -771,7 +771,7 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
   assert(TheTriple.isOSBinFormatELF() &&
          "Big endian is only supported for ELF targets!");
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+  bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32; 
   return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/false,
                                   IsILP32);
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index fcf67bd2f7..6c98ac4737 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -43,7 +43,7 @@ protected:
 } // end anonymous namespace
 
 AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32)
-    : MCELFObjectTargetWriter(/*Is64Bit*/ !IsILP32, OSABI, ELF::EM_AARCH64,
+    : MCELFObjectTargetWriter(/*Is64Bit*/ !IsILP32, OSABI, ELF::EM_AARCH64, 
                               /*HasRelocationAddend*/ true),
       IsILP32(IsILP32) {}
 
@@ -322,11 +322,11 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
         return R_CLS(LDST64_ABS_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) {
-        AArch64MCExpr::VariantKind AddressLoc =
-            AArch64MCExpr::getAddressFrag(RefKind);
+        AArch64MCExpr::VariantKind AddressLoc = 
+            AArch64MCExpr::getAddressFrag(RefKind); 
         if (!IsILP32) {
-          if (AddressLoc == AArch64MCExpr::VK_LO15)
-            return ELF::R_AARCH64_LD64_GOTPAGE_LO15;
+          if (AddressLoc == AArch64MCExpr::VK_LO15) 
+            return ELF::R_AARCH64_LD64_GOTPAGE_LO15; 
           return ELF::R_AARCH64_LD64_GOT_LO12_NC;
         } else {
           Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index ec97e1c8b7..2135cf605b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -51,61 +51,61 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
     OS << "\t.variant_pcs " << Symbol->getName() << "\n";
   }
 
-  void EmitARM64WinCFIAllocStack(unsigned Size) override {
-    OS << "\t.seh_stackalloc " << Size << "\n";
-  }
-  void EmitARM64WinCFISaveR19R20X(int Offset) override {
-    OS << "\t.seh_save_r19r20_x " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFPLR(int Offset) override {
-    OS << "\t.seh_save_fplr " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFPLRX(int Offset) override {
-    OS << "\t.seh_save_fplr_x " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_reg x" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_reg_x x" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_regp x" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_freg_x d" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_fregp d" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override {
-    OS << "\t.seh_save_fregp_x d" << Reg << ", " << Offset << "\n";
-  }
-  void EmitARM64WinCFISetFP() override { OS << "\t.seh_set_fp\n"; }
-  void EmitARM64WinCFIAddFP(unsigned Size) override {
-    OS << "\t.seh_add_fp " << Size << "\n";
-  }
-  void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; }
-  void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; }
-  void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; }
-  void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; }
-  void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; }
-  void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; }
-  void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; }
-  void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; }
-  void EmitARM64WinCFIClearUnwoundToCall() override {
-    OS << "\t.seh_clear_unwound_to_call\n";
-  }
-
+  void EmitARM64WinCFIAllocStack(unsigned Size) override { 
+    OS << "\t.seh_stackalloc " << Size << "\n"; 
+  } 
+  void EmitARM64WinCFISaveR19R20X(int Offset) override { 
+    OS << "\t.seh_save_r19r20_x " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFPLR(int Offset) override { 
+    OS << "\t.seh_save_fplr " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFPLRX(int Offset) override { 
+    OS << "\t.seh_save_fplr_x " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_reg x" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_reg_x x" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_regp x" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_freg_x d" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_fregp d" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override { 
+    OS << "\t.seh_save_fregp_x d" << Reg << ", " << Offset << "\n"; 
+  } 
+  void EmitARM64WinCFISetFP() override { OS << "\t.seh_set_fp\n"; } 
+  void EmitARM64WinCFIAddFP(unsigned Size) override { 
+    OS << "\t.seh_add_fp " << Size << "\n"; 
+  } 
+  void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; } 
+  void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; } 
+  void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; } 
+  void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; } 
+  void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; } 
+  void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; } 
+  void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; } 
+  void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; } 
+  void EmitARM64WinCFIClearUnwoundToCall() override { 
+    OS << "\t.seh_clear_unwound_to_call\n"; 
+  } 
+ 
 public:
   AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
 };
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 340120d2b9..4aeb45ac21 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -849,7 +849,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
     }
     break;
     }
-  } else if (CnVal == 8 || CnVal == 9) {
+  } else if (CnVal == 8 || CnVal == 9) { 
     // TLBI aliases
     const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
     if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
@@ -1377,8 +1377,8 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
   }
 }
 
-void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
-                                        unsigned OpNum,
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address, 
+                                        unsigned OpNum, 
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
@@ -1386,11 +1386,11 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
   // If the label has already been resolved to an immediate offset (say, when
   // we're running the disassembler), just print the immediate.
   if (Op.isImm()) {
-    const int64_t Offset = Op.getImm() * 4096;
-    if (PrintBranchImmAsAddress)
-      O << formatHex((Address & -4096) + Offset);
-    else
-      O << "#" << Offset;
+    const int64_t Offset = Op.getImm() * 4096; 
+    if (PrintBranchImmAsAddress) 
+      O << formatHex((Address & -4096) + Offset); 
+    else 
+      O << "#" << Offset; 
     return;
   }
 
@@ -1421,22 +1421,22 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
     O << "#" << Val;
 }
 
-void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
-                                               const MCSubtargetInfo &STI,
-                                               raw_ostream &O) {
-  unsigned Val = MI->getOperand(OpNo).getImm();
-  assert(MI->getOpcode() == AArch64::DSBnXS);
-
-  StringRef Name;
-  auto DB = AArch64DBnXS::lookupDBnXSByEncoding(Val);
-  Name = DB ? DB->Name : "";
-
-  if (!Name.empty())
-    O << Name;
-  else
-    O << "#" << Val;
-}
-
+void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo, 
+                                               const MCSubtargetInfo &STI, 
+                                               raw_ostream &O) { 
+  unsigned Val = MI->getOperand(OpNo).getImm(); 
+  assert(MI->getOpcode() == AArch64::DSBnXS); 
+ 
+  StringRef Name; 
+  auto DB = AArch64DBnXS::lookupDBnXSByEncoding(Val); 
+  Name = DB ? DB->Name : ""; 
+ 
+  if (!Name.empty()) 
+    O << Name; 
+  else 
+    O << "#" << Val; 
+} 
+ 
 void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
                                                 const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
@@ -1644,10 +1644,10 @@ void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
   unsigned Reg = MI->getOperand(OpNum).getReg();
   O << getRegisterName(getWRegFromXReg(Reg));
 }
-
-void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum,
-                                      const MCSubtargetInfo &STI,
-                                      raw_ostream &O) {
-  unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(MRI.getSubReg(Reg, AArch64::x8sub_0));
-}
+ 
+void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum, 
+                                      const MCSubtargetInfo &STI, 
+                                      raw_ostream &O) { 
+  unsigned Reg = MI->getOperand(OpNum).getReg(); 
+  O << getRegisterName(MRI.getSubReg(Reg, AArch64::x8sub_0)); 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 4be885e667..b1952ebd27 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -30,7 +30,7 @@ public:
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; 
   virtual void printInstruction(const MCInst *MI, uint64_t Address,
                                 const MCSubtargetInfo &STI, raw_ostream &O);
   virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
@@ -156,12 +156,12 @@ protected:
 
   void printVectorIndex(const MCInst *MI, unsigned OpNum,
                         const MCSubtargetInfo &STI, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
+  void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, 
                       const MCSubtargetInfo &STI, raw_ostream &O);
   void printBarrierOption(const MCInst *MI, unsigned OpNum,
                           const MCSubtargetInfo &STI, raw_ostream &O);
-  void printBarriernXSOption(const MCInst *MI, unsigned OpNum,
-                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBarriernXSOption(const MCInst *MI, unsigned OpNum, 
+                             const MCSubtargetInfo &STI, raw_ostream &O); 
   void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
                               const MCSubtargetInfo &STI, raw_ostream &O);
   void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
@@ -190,8 +190,8 @@ protected:
                     const MCSubtargetInfo &STI, raw_ostream &O);
   void printGPR64as32(const MCInst *MI, unsigned OpNum,
                       const MCSubtargetInfo &STI, raw_ostream &O);
-  void printGPR64x8(const MCInst *MI, unsigned OpNum,
-                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPR64x8(const MCInst *MI, unsigned OpNum, 
+                    const MCSubtargetInfo &STI, raw_ostream &O); 
   template <int Width>
   void printZPRasFPR(const MCInst *MI, unsigned OpNum,
                      const MCSubtargetInfo &STI, raw_ostream &O);
@@ -208,7 +208,7 @@ public:
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
-  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override; 
   void printInstruction(const MCInst *MI, uint64_t Address,
                         const MCSubtargetInfo &STI, raw_ostream &O) override;
   bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 68c721cb0d..257ecd33d2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -73,7 +73,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
   // targeting ELF.
   AssemblerDialect = AsmWriterVariant == Default ? Generic : AsmWriterVariant;
 
-  CodePointerSize = T.getEnvironment() == Triple::GNUILP32 ? 4 : 8;
+  CodePointerSize = T.getEnvironment() == Triple::GNUILP32 ? 4 : 8; 
 
   // ".comm align is in bytes but .align is pow-2."
   AlignmentIsInBytes = false;
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
   SupportsDebugInformation = true;
   CodePointerSize = 8;
 
-  CommentString = "//";
+  CommentString = "//"; 
   ExceptionsType = ExceptionHandling::WinEH;
   WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 844bd6bbad..dd975cd363 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -70,7 +70,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
   case VK_ABS_PAGE_NC:         return ":pg_hi21_nc:";
   case VK_GOT:                 return ":got:";
   case VK_GOT_PAGE:            return ":got:";
-  case VK_GOT_PAGE_LO15:       return ":gotpage_lo15:";
+  case VK_GOT_PAGE_LO15:       return ":gotpage_lo15:"; 
   case VK_GOT_LO12:            return ":got_lo12:";
   case VK_GOTTPREL:            return ":gottprel:";
   case VK_GOTTPREL_PAGE:       return ":gottprel:";
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index d3e834a140..6e191cd455 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -46,7 +46,7 @@ public:
     VK_G1       = 0x050,
     VK_G2       = 0x060,
     VK_G3       = 0x070,
-    VK_LO15     = 0x080,
+    VK_LO15     = 0x080, 
     VK_AddressFragBits = 0x0f0,
 
     // Whether the final relocation is a checked one (where a linker should
@@ -83,7 +83,7 @@ public:
     VK_PREL_G0_NC        = VK_PREL     | VK_G0      | VK_NC,
     VK_GOT_LO12          = VK_GOT      | VK_PAGEOFF | VK_NC,
     VK_GOT_PAGE          = VK_GOT      | VK_PAGE,
-    VK_GOT_PAGE_LO15     = VK_GOT      | VK_LO15    | VK_NC,
+    VK_GOT_PAGE_LO15     = VK_GOT      | VK_LO15    | VK_NC, 
     VK_DTPREL_G2         = VK_DTPREL   | VK_G2,
     VK_DTPREL_G1         = VK_DTPREL   | VK_G1,
     VK_DTPREL_G1_NC      = VK_DTPREL   | VK_G1      | VK_NC,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3c2df1621e..98dcd9a96a 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -50,14 +50,14 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
 
 static MCSubtargetInfo *
 createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  if (CPU.empty()) {
+  if (CPU.empty()) { 
     CPU = "generic";
 
-    if (TT.isArm64e())
-      CPU = "apple-a12";
-  }
-
-  return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+    if (TT.isArm64e()) 
+      CPU = "apple-a12"; 
+  } 
+ 
+  return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); 
 }
 
 void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 012661edbb..f2384aa588 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -373,11 +373,11 @@ void AArch64MachObjectWriter::recordRelocation(
        Type == MachO::ARM64_RELOC_PAGE21 ||
        Type == MachO::ARM64_RELOC_PAGEOFF12) &&
       Value) {
-    if (!isInt<24>(Value)) {
-      Asm.getContext().reportError(Fixup.getLoc(),
-                                   "addend too big for relocation");
-      return;
-    }
+    if (!isInt<24>(Value)) { 
+      Asm.getContext().reportError(Fixup.getLoc(), 
+                                   "addend too big for relocation"); 
+      return; 
+    } 
 
     MachO::any_relocation_info MRE;
     MRE.r_word0 = FixupOffset;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index f32a8f15b8..8f3e876061 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -11,23 +11,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64TargetStreamer.h"
-#include "AArch64MCAsmInfo.h"
-#include "AArch64Subtarget.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "AArch64MCAsmInfo.h" 
+#include "AArch64Subtarget.h" 
+#include "llvm/BinaryFormat/ELF.h" 
 #include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCContext.h" 
 #include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionELF.h" 
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CommandLine.h" 
 
 using namespace llvm;
 
-static cl::opt<bool> MarkBTIProperty(
-    "aarch64-mark-bti-property", cl::Hidden,
-    cl::desc("Add .note.gnu.property with BTI to assembly files"),
-    cl::init(false));
-
+static cl::opt<bool> MarkBTIProperty( 
+    "aarch64-mark-bti-property", cl::Hidden, 
+    cl::desc("Add .note.gnu.property with BTI to assembly files"), 
+    cl::init(false)); 
+ 
 //
 // AArch64TargetStreamer Implemenation
 //
@@ -48,51 +48,51 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
   ConstantPools->emitForCurrentSection(Streamer);
 }
 
-// finish() - write out any non-empty assembler constant pools and
-//   write out note.gnu.properties if need.
-void AArch64TargetStreamer::finish() {
-  ConstantPools->emitAll(Streamer);
-
-  if (MarkBTIProperty)
-    emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
-}
-
-void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
-  if (Flags == 0)
-    return;
-
-  MCStreamer &OutStreamer = getStreamer();
-  MCContext &Context = OutStreamer.getContext();
-  // Emit a .note.gnu.property section with the flags.
-  MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE,
-                                           ELF::SHF_ALLOC);
-  if (Nt->isRegistered()) {
-    SMLoc Loc;
-    Context.reportWarning(
-        Loc,
-        "The .note.gnu.property is not emitted because it is already present.");
-    return;
-  }
-  MCSection *Cur = OutStreamer.getCurrentSectionOnly();
-  OutStreamer.SwitchSection(Nt);
-
-  // Emit the note header.
-  OutStreamer.emitValueToAlignment(Align(8).value());
-  OutStreamer.emitIntValue(4, 4);     // data size for "GNU\0"
-  OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size
-  OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
-  OutStreamer.emitBytes(StringRef("GNU", 4)); // note name
-
-  // Emit the PAC/BTI properties.
-  OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
-  OutStreamer.emitIntValue(4, 4);     // data size
-  OutStreamer.emitIntValue(Flags, 4); // data
-  OutStreamer.emitIntValue(0, 4);     // pad
-
-  OutStreamer.endSection(Nt);
-  OutStreamer.SwitchSection(Cur);
-}
-
+// finish() - write out any non-empty assembler constant pools and 
+//   write out note.gnu.properties if need. 
+void AArch64TargetStreamer::finish() { 
+  ConstantPools->emitAll(Streamer); 
+
+  if (MarkBTIProperty) 
+    emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); 
+} 
+ 
+void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { 
+  if (Flags == 0) 
+    return; 
+ 
+  MCStreamer &OutStreamer = getStreamer(); 
+  MCContext &Context = OutStreamer.getContext(); 
+  // Emit a .note.gnu.property section with the flags. 
+  MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE, 
+                                           ELF::SHF_ALLOC); 
+  if (Nt->isRegistered()) { 
+    SMLoc Loc; 
+    Context.reportWarning( 
+        Loc, 
+        "The .note.gnu.property is not emitted because it is already present."); 
+    return; 
+  } 
+  MCSection *Cur = OutStreamer.getCurrentSectionOnly(); 
+  OutStreamer.SwitchSection(Nt); 
+ 
+  // Emit the note header. 
+  OutStreamer.emitValueToAlignment(Align(8).value()); 
+  OutStreamer.emitIntValue(4, 4);     // data size for "GNU\0" 
+  OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size 
+  OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); 
+  OutStreamer.emitBytes(StringRef("GNU", 4)); // note name 
+ 
+  // Emit the PAC/BTI properties. 
+  OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); 
+  OutStreamer.emitIntValue(4, 4);     // data size 
+  OutStreamer.emitIntValue(Flags, 4); // data 
+  OutStreamer.emitIntValue(0, 4);     // pad 
+ 
+  OutStreamer.endSection(Nt); 
+  OutStreamer.SwitchSection(Cur); 
+} 
+ 
 void AArch64TargetStreamer::emitInst(uint32_t Inst) {
   char Buffer[4];
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 73dc1e5d4d..5212d70a57 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -33,9 +33,9 @@ public:
   /// Emit contents of constant pool for the current section.
   void emitCurrentConstantPool();
 
-  /// Callback used to implement the .note.gnu.property section.
-  void emitNoteSection(unsigned Flags);
-
+  /// Callback used to implement the .note.gnu.property section. 
+  void emitNoteSection(unsigned Flags); 
+ 
   /// Callback used to implement the .inst directive.
   virtual void emitInst(uint32_t Inst);
 
@@ -43,14 +43,14 @@ public:
   virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {};
 
   virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
-  virtual void EmitARM64WinCFISaveR19R20X(int Offset) {}
+  virtual void EmitARM64WinCFISaveR19R20X(int Offset) {} 
   virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
   virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
   virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
   virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
   virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
   virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
-  virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {}
+  virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {} 
   virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
   virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
   virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
@@ -58,14 +58,14 @@ public:
   virtual void EmitARM64WinCFISetFP() {}
   virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
   virtual void EmitARM64WinCFINop() {}
-  virtual void EmitARM64WinCFISaveNext() {}
+  virtual void EmitARM64WinCFISaveNext() {} 
   virtual void EmitARM64WinCFIPrologEnd() {}
   virtual void EmitARM64WinCFIEpilogStart() {}
   virtual void EmitARM64WinCFIEpilogEnd() {}
-  virtual void EmitARM64WinCFITrapFrame() {}
-  virtual void EmitARM64WinCFIMachineFrame() {}
-  virtual void EmitARM64WinCFIContext() {}
-  virtual void EmitARM64WinCFIClearUnwoundToCall() {}
+  virtual void EmitARM64WinCFITrapFrame() {} 
+  virtual void EmitARM64WinCFIMachineFrame() {} 
+  virtual void EmitARM64WinCFIContext() {} 
+  virtual void EmitARM64WinCFIClearUnwoundToCall() {} 
 
 private:
   std::unique_ptr<AssemblerConstantPools> ConstantPools;
@@ -96,14 +96,14 @@ public:
   // The unwind codes on ARM64 Windows are documented at
   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
   void EmitARM64WinCFIAllocStack(unsigned Size) override;
-  void EmitARM64WinCFISaveR19R20X(int Offset) override;
+  void EmitARM64WinCFISaveR19R20X(int Offset) override; 
   void EmitARM64WinCFISaveFPLR(int Offset) override;
   void EmitARM64WinCFISaveFPLRX(int Offset) override;
   void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
   void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
   void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
   void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
-  void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override;
+  void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override; 
   void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
   void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
   void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
@@ -111,15 +111,15 @@ public:
   void EmitARM64WinCFISetFP() override;
   void EmitARM64WinCFIAddFP(unsigned Size) override;
   void EmitARM64WinCFINop() override;
-  void EmitARM64WinCFISaveNext() override;
+  void EmitARM64WinCFISaveNext() override; 
   void EmitARM64WinCFIPrologEnd() override;
   void EmitARM64WinCFIEpilogStart() override;
   void EmitARM64WinCFIEpilogEnd() override;
-  void EmitARM64WinCFITrapFrame() override;
-  void EmitARM64WinCFIMachineFrame() override;
-  void EmitARM64WinCFIContext() override;
-  void EmitARM64WinCFIClearUnwoundToCall() override;
-
+  void EmitARM64WinCFITrapFrame() override; 
+  void EmitARM64WinCFIMachineFrame() override; 
+  void EmitARM64WinCFIContext() override; 
+  void EmitARM64WinCFIClearUnwoundToCall() override; 
+ 
 private:
   void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
 };
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 1c50706a26..603446f40d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -28,7 +28,7 @@ public:
 
   void EmitWinEHHandlerData(SMLoc Loc) override;
   void EmitWindowsUnwindTables() override;
-  void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+  void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override; 
   void finishImpl() override;
 };
 
@@ -37,14 +37,14 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
 
   // We have to emit the unwind info now, because this directive
   // actually switches to the .xdata section!
-  EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
-                            /* HandlerData = */ true);
-}
-
-void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
-  EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
+  EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(), 
+                            /* HandlerData = */ true); 
 }
 
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) { 
+  EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false); 
+} 
+ 
 void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
   if (!getNumWinFrameInfos())
     return;
@@ -91,10 +91,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
   EmitARM64WinUnwindCode(Op, -1, Size);
 }
 
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) {
-  EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset); 
+} 
+ 
 void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
   EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
 }
@@ -125,11 +125,11 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
   EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
 }
 
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg,
-                                                             int Offset) {
-  EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg, 
+                                                             int Offset) { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset); 
+} 
+ 
 void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
                                                            int Offset) {
   assert(Offset >= 0 && Offset <= 504 &&
@@ -165,10 +165,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
   EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
 }
 
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() {
-  EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0); 
+} 
+ 
 // The functions below handle opcodes that can end up in either a prolog or
 // an epilog, but not both.
 void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
@@ -207,22 +207,22 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
   CurrentEpilog = nullptr;
 }
 
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() {
-  EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() {
-  EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() {
-  EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() {
-  EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0); 
+} 
+ 
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0); 
+} 
+ 
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0); 
+} 
+ 
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() { 
+  EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0); 
+} 
+ 
 MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
     std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
index 18b5c7460f..9a6f23a3c8 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
@@ -12,19 +12,19 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/BinaryFormat
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
-    contrib/libs/llvm12/lib/Target/AArch64/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/BinaryFormat 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo 
+    contrib/libs/llvm12/lib/Target/AArch64/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td b/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
index 4eecf72862..0c31ac1f9a 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
@@ -206,20 +206,20 @@ def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>",
 
 def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
 
-def SVEArithUImm8Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
-def SVEArithUImm16Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
-def SVEArithUImm32Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
-def SVEArithUImm64Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
+def SVEArithUImm8Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>; 
+def SVEArithUImm16Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>; 
+def SVEArithUImm32Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>; 
+def SVEArithUImm64Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>; 
 def SVEArithSImmPat  : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
 
-def SVEShiftImmL8  : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>",  []>;
-def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>;
-def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>;
-def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>;
-def SVEShiftImmR8  : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8,  true>", []>;
-def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>;
-def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
-def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>;
+def SVEShiftImmL8  : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>",  []>; 
+def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>; 
+def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>; 
+def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>; 
+def SVEShiftImmR8  : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8,  true>", []>; 
+def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>; 
+def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>; 
+def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>; 
 
 class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
   let Name = "SVEExactFPImmOperand" # Suffix;
@@ -280,8 +280,8 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
   let Inst{3-0}   = Pd;
 
   let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
-  let ElementSize = pprty.ElementSize;
-  let isReMaterializable = 1;
+  let ElementSize = pprty.ElementSize; 
+  let isReMaterializable = 1; 
 }
 
 multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
@@ -317,18 +317,18 @@ class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
 : Pat<(vtd (op vt1:$Op1)),
       (inst $Op1)>;
 
-class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
-                            ValueType vts, Instruction inst>
-: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
-      (inst $Op3, $Op1, $Op2)>;
-
-// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
-// type of rounding. This is matched by timm0_1 in pattern below and ignored.
-class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
-                                  ValueType vts, Instruction inst>
-: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
-      (inst $Op3, $Op1, $Op2)>;
-
+class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg, 
+                            ValueType vts, Instruction inst> 
+: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), 
+      (inst $Op3, $Op1, $Op2)>; 
+ 
+// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the 
+// type of rounding. This is matched by timm0_1 in pattern below and ignored. 
+class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg, 
+                                  ValueType vts, Instruction inst> 
+: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), 
+      (inst $Op3, $Op1, $Op2)>; 
+ 
 class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
                                       ValueType it, ComplexPattern cpx, Instruction inst>
   : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
@@ -354,11 +354,11 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
       (inst $Op1, $Op2)>;
 
-class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
-                               ValueType pt, ValueType vt1, ValueType vt2,
-                               Instruction inst>
-: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
-      (inst $Op1, $Op2)>;
+class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op, 
+                               ValueType pt, ValueType vt1, ValueType vt2, 
+                               Instruction inst> 
+: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)), 
+      (inst $Op1, $Op2)>; 
 
 class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
                    ValueType vt2, ValueType vt3, Instruction inst>
@@ -418,23 +418,23 @@ class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
       (inst (ptrue 31), $Op1, $Op2)>;
 
-class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
-                       ValueType inreg_vt, Instruction inst>
-: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
-      (inst $PassThru, $Pg, $Src)>;
-
-class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
-                                ValueType pt, ValueType it,
-                                ComplexPattern cast, Instruction inst>
-: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
-      (inst $Pg, $Rn, i32:$imm)>;
-
-class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
-                                      ValueType pt, ValueType it,
-                                      ComplexPattern cast, Instruction inst>
-: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
-      (inst $Rn, i32:$imm)>;
-
+class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt, 
+                       ValueType inreg_vt, Instruction inst> 
+: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)), 
+      (inst $PassThru, $Pg, $Src)>; 
+ 
+class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op, 
+                                ValueType pt, ValueType it, 
+                                ComplexPattern cast, Instruction inst> 
+: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), 
+      (inst $Pg, $Rn, i32:$imm)>; 
+ 
+class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op, 
+                                      ValueType pt, ValueType it, 
+                                      ComplexPattern cast, Instruction inst> 
+: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), 
+      (inst $Rn, i32:$imm)>; 
+ 
 //
 // Pseudo -> Instruction mappings
 //
@@ -511,8 +511,8 @@ class sve_int_pfalse<bits<6> opc, string asm>
   let Inst{9}     = opc{0};
   let Inst{8-4}   = 0b00000;
   let Inst{3-0}   = Pd;
-
-  let isReMaterializable = 1;
+ 
+  let isReMaterializable = 1; 
 }
 
 class sve_int_ptest<bits<6> opc, string asm>
@@ -533,7 +533,7 @@ class sve_int_ptest<bits<6> opc, string asm>
   let Inst{4-0}   = 0b00000;
 
   let Defs = [NZCV];
-  let isCompare = 1;
+  let isCompare = 1; 
 }
 
 class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
@@ -1014,8 +1014,8 @@ multiclass sve_int_perm_dup_i<string asm> {
                   (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
 }
 
-class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
-                       RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty, 
+                       RegisterOperand VecList> 
 : I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
@@ -1057,8 +1057,8 @@ multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>; 
 }
 
 multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1101,11 +1101,11 @@ multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
             (nxv2f64 (!cast<Instruction>(NAME # _D) (REG_SEQUENCE ZPR2, nxv2f64:$Op1, zsub0,
                                                                         nxv2f64:$Op2, zsub1),
                                                      nxv2i64:$Op3))>;
-
-  def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
-            (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0,
-                                                                         nxv8bf16:$Op2, zsub1),
-                                                      nxv8i16:$Op3))>;
+ 
+  def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)), 
+            (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, 
+                                                                         nxv8bf16:$Op2, zsub1), 
+                                                      nxv8i16:$Op3))>; 
 }
 
 class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1141,8 +1141,8 @@ multiclass sve2_int_perm_tbx<string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1173,8 +1173,8 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
   def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
   def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
@@ -1287,8 +1287,8 @@ multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -1375,8 +1375,8 @@ multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1,  nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1,  nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 
-  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1,  nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
-
+  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1,  nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
+ 
   def : InstAlias<"mov $Zd, $Pg/m, $Zn",
                   (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
   def : InstAlias<"mov $Zd, $Pg/m, $Zn",
@@ -1713,8 +1713,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
-                           SDPatternOperator predicated_op = null_frag> {
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op, 
+                           SDPatternOperator predicated_op = null_frag> { 
   def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
   def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
   def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@@ -1723,9 +1723,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 
-  def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2117,8 +2117,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
   let ElementSize = zprty.ElementSize;
 }
 
-multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
-                                 SDPatternOperator op> {
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm, 
+                                 SDPatternOperator op> { 
   def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
   def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
   def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
@@ -2270,11 +2270,11 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
   def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4f16, op, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2f16, op, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>;
-  def : SVE_2_Op_Pat<nxv2f32, op, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv2f16, op, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>; 
+  def : SVE_2_Op_Pat<nxv2f32, op, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>; 
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -2282,7 +2282,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
 //===----------------------------------------------------------------------===//
 
 class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
-                      RegisterOperand o_zprtype, ElementSizeEnum Sz>
+                      RegisterOperand o_zprtype, ElementSizeEnum Sz> 
 : I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
   asm, "\t$Zd, $Pg/m, $Zn",
   "",
@@ -2301,64 +2301,64 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
 
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveOther;
-  let ElementSize = Sz;
+  let ElementSize = Sz; 
 }
 
 multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
                            RegisterOperand i_zprtype,
                            RegisterOperand o_zprtype,
-                           SDPatternOperator int_op,
-                           SDPatternOperator ir_op, ValueType vt1,
+                           SDPatternOperator int_op, 
+                           SDPatternOperator ir_op, ValueType vt1, 
                            ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
   def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
 
-  // convert vt1 to a packed type for the intrinsic patterns
-  defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
-                           !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
-                           !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
-                           1 : vt1);
-
-  // convert vt3 to a packed type for the intrinsic patterns
-  defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
-                           !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
-                           !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
-                           1 : vt3);
-
-  def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
-
-  def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
-}
-
-multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
-                            RegisterOperand i_zprtype,
-                            RegisterOperand o_zprtype,
-                            SDPatternOperator int_op,
-                            SDPatternOperator ir_op, ValueType vt1,
-                            ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
-  def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
-
-  // convert vt1 to a packed type for the intrinsic patterns
-  defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
-                           !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
-                           !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
-                           1 : vt1);
-
-  def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
-
-  def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
-}
-
+  // convert vt1 to a packed type for the intrinsic patterns 
+  defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, 
+                           !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16, 
+                           !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32, 
+                           1 : vt1); 
+ 
+  // convert vt3 to a packed type for the intrinsic patterns 
+  defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16, 
+                           !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16, 
+                           !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32, 
+                           1 : vt3); 
+ 
+  def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>; 
+ 
+  def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; 
+}
+
+multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm, 
+                            RegisterOperand i_zprtype, 
+                            RegisterOperand o_zprtype, 
+                            SDPatternOperator int_op, 
+                            SDPatternOperator ir_op, ValueType vt1, 
+                            ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { 
+  def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>; 
+ 
+  // convert vt1 to a packed type for the intrinsic patterns 
+  defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, 
+                           !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16, 
+                           !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32, 
+                           1 : vt1); 
+ 
+  def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>; 
+ 
+  def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; 
+} 
+ 
 multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
   def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
   def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
   def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
@@ -2466,19 +2466,19 @@ multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, string Ps,
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
-                                   SDPatternOperator op,
-                                   DestructiveInstTypeEnum flags> {
-  let DestructiveInstType = flags in {
-  def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>,
-             SVEPseudo2Instr<Ps # _B, 1>;
-  def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>,
-             SVEPseudo2Instr<Ps # _H, 1>;
-  def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>,
-             SVEPseudo2Instr<Ps # _S, 1>;
-  def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>,
-             SVEPseudo2Instr<Ps # _D, 1>;
-  }
+multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps, 
+                                   SDPatternOperator op, 
+                                   DestructiveInstTypeEnum flags> { 
+  let DestructiveInstType = flags in { 
+  def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>, 
+             SVEPseudo2Instr<Ps # _B, 1>; 
+  def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>, 
+             SVEPseudo2Instr<Ps # _H, 1>; 
+  def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>, 
+             SVEPseudo2Instr<Ps # _S, 1>; 
+  def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>, 
+             SVEPseudo2Instr<Ps # _D, 1>; 
+  } 
 
   def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
   def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2486,19 +2486,19 @@ multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, string Ps,
-                                   SDPatternOperator op,
-                                   DestructiveInstTypeEnum flags> {
-  let DestructiveInstType = flags in {
-  def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>,
-             SVEPseudo2Instr<Ps # _B, 1>;
-  def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>,
-             SVEPseudo2Instr<Ps # _H, 1>;
-  def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>,
-             SVEPseudo2Instr<Ps # _S, 1>;
-  def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>,
-             SVEPseudo2Instr<Ps # _D, 1>;
-  }
+multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, string Ps, 
+                                   SDPatternOperator op, 
+                                   DestructiveInstTypeEnum flags> { 
+  let DestructiveInstType = flags in { 
+  def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>, 
+             SVEPseudo2Instr<Ps # _B, 1>; 
+  def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>, 
+             SVEPseudo2Instr<Ps # _H, 1>; 
+  def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>, 
+             SVEPseudo2Instr<Ps # _S, 1>; 
+  def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>, 
+             SVEPseudo2Instr<Ps # _D, 1>; 
+  } 
 
   def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
   def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2588,8 +2588,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
   let ElementSize = zprty.ElementSize;
 }
 
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
-                                 SDPatternOperator outerop, SDPatternOperator mulop> {
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op, 
+                                 SDPatternOperator outerop, SDPatternOperator mulop> { 
   def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
   def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
   def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
@@ -2599,15 +2599,15 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
   def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
   def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
-  def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)),
-            (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>;
-  def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)),
-            (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>;
-  def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)),
-            (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>;
-  def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)),
-            (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>;
+ 
+  def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)), 
+            (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>; 
+  def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)), 
+            (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>; 
+  def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)), 
+            (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>; 
+  def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)), 
+            (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -2711,8 +2711,8 @@ multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
 // SVE2 Integer Multiply-Add Long - Indexed Group
 //===----------------------------------------------------------------------===//
 
-multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm,
-                                             SDPatternOperator op> {
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm, 
+                                             SDPatternOperator op> { 
   def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
                                         asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> {
     bits<3> Zm;
@@ -2962,8 +2962,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
-                        SDPatternOperator op_pred = null_frag> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op, 
+                        SDPatternOperator op_pred = null_frag> { 
   def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
   def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
   def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
@@ -2973,11 +2973,11 @@ multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
   def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ 
+  def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3531,8 +3531,8 @@ multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
-                                      SDPatternOperator op> {
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm, 
+                                      SDPatternOperator op> { 
   def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
                                   ZPR32, ZPR32>;
   def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
@@ -3576,7 +3576,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
     let Inst{19} = imm{3};
   }
   def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
-                                                tvecshiftR32> {
+                                                tvecshiftR32> { 
     let Inst{20-19} = imm{4-3};
   }
   def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8,  !cast<Instruction>(NAME # _B)>;
@@ -3616,7 +3616,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
     let Inst{19} = imm{3};
   }
   def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
-                                             tvecshiftR32> {
+                                             tvecshiftR32> { 
     let Inst{20-19} = imm{4-3};
   }
   def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8,  !cast<Instruction>(NAME # _B)>;
@@ -3777,10 +3777,10 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
@@ -3789,9 +3789,9 @@ multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
 
-  def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
-  def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
-  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
+  def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
@@ -3799,15 +3799,15 @@ multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
 
-  def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
-  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
+  def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
                                     SDPatternOperator op> {
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
 
-  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
+  def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
@@ -3817,23 +3817,23 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> { 
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -4002,10 +4002,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
   def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
   def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
 
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
@@ -4014,10 +4014,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
   def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
   def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
 
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -4130,7 +4130,7 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
     let Inst{22}    = imm{5};
     let Inst{20-19} = imm{4-3};
   }
-
+ 
   def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8,  !cast<Instruction>(NAME # _B)>;
   def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
@@ -4289,8 +4289,8 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
   let Inst{3-0}   = Pd;
 
   let Defs = [NZCV];
-  let ElementSize = pprty.ElementSize;
-  let isPTestLike = 1;
+  let ElementSize = pprty.ElementSize; 
+  let isPTestLike = 1; 
 }
 
 multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
@@ -4363,7 +4363,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
 
   let Defs = [NZCV];
   let ElementSize = pprty.ElementSize;
-  let isPTestLike = 1;
+  let isPTestLike = 1; 
 }
 
 multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
@@ -4423,8 +4423,8 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
   let Inst{3-0}   = Pd;
 
   let Defs = [NZCV];
-  let ElementSize = pprty.ElementSize;
-  let isPTestLike = 1;
+  let ElementSize = pprty.ElementSize; 
+  let isPTestLike = 1; 
 }
 
 multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
@@ -4469,7 +4469,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
 }
 
 class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
-                       RegisterClass gprty, PPRRegOp pprty>
+                       RegisterClass gprty, PPRRegOp pprty> 
 : I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
   asm, "\t$Pd, $Rn, $Rm",
   "", []>, Sched<[]> {
@@ -4487,32 +4487,32 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
   let Inst{3-0}   = Pd;
 
   let Defs = [NZCV];
-  let ElementSize = pprty.ElementSize;
-  let isWhile = 1;
+  let ElementSize = pprty.ElementSize; 
+  let isWhile = 1; 
 }
 
 multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
-  def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
-  def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
-  def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
-  def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+  def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>; 
+  def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>; 
+  def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>; 
+  def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>; 
 
   def : SVE_2_Op_Pat<nxv16i1, op, i32, i32, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv8i1,  op, i32, i32, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv4i1,  op, i32, i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2i1,  op, i32, i32, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv8i1,  op, i32, i32, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv4i1,  op, i32, i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pat<nxv2i1,  op, i32, i32, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
-  def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
-  def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
-  def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
-  def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+  def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>; 
+  def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>; 
+  def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>; 
+  def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>; 
 
   def : SVE_2_Op_Pat<nxv16i1, op, i64, i64, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv8i1,  op, i64, i64, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv4i1,  op, i64, i64, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2i1,  op, i64, i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv8i1,  op, i64, i64, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv4i1,  op, i64, i64, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pat<nxv2i1,  op, i64, i64, !cast<Instruction>(NAME # _D)>; 
 }
 
 class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
@@ -4533,8 +4533,8 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
   let Inst{3-0}   = Pd;
 
   let Defs = [NZCV];
-  let ElementSize = pprty.ElementSize;
-  let isWhile = 1;
+  let ElementSize = pprty.ElementSize; 
+  let isWhile = 1; 
 }
 
 multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
@@ -4577,10 +4577,10 @@ multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
   def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
   def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;
 
-  def : SVE_2_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>; 
   def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>; 
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
@@ -4616,10 +4616,10 @@ multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
   def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
   def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;
 
-  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H)>; 
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S)>; 
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
@@ -4840,11 +4840,11 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv4i32, op, i32, i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i64, op, i64, i64, !cast<Instruction>(NAME # _D)>;
 }
-
+ 
 //===----------------------------------------------------------------------===//
 // SVE Bitwise Shift - Predicated Group
 //===----------------------------------------------------------------------===//
-
+ 
 class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
                                  ZPRRegOp zprty, Operand immtype>
 : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
@@ -4869,19 +4869,19 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
   let ElementSize = zprty.ElementSize;
 }
 
-multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
-                                           SDPatternOperator op = null_frag> {
-  def _B : SVEPseudo2Instr<Ps # _B, 1>,
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps, 
+                                           SDPatternOperator op = null_frag> { 
+  def _B : SVEPseudo2Instr<Ps # _B, 1>, 
            sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
-  def _H : SVEPseudo2Instr<Ps # _H, 1>,
+  def _H : SVEPseudo2Instr<Ps # _H, 1>, 
            sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
     let Inst{8} = imm{3};
   }
-  def _S : SVEPseudo2Instr<Ps # _S, 1>,
+  def _S : SVEPseudo2Instr<Ps # _S, 1>, 
            sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
     let Inst{9-8} = imm{4-3};
   }
-  def _D : SVEPseudo2Instr<Ps # _D, 1>,
+  def _D : SVEPseudo2Instr<Ps # _D, 1>, 
            sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
     let Inst{22}  = imm{5};
     let Inst{9-8} = imm{4-3};
@@ -4893,16 +4893,16 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
   def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1,  nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
 }
 
-// As above but shift amount takes the form of a "vector immediate".
-multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
-                                               string Ps, SDPatternOperator op>
-: sve_int_bin_pred_shift_imm_left<opc, asm, Ps, null_frag> {
-  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8,  !cast<Instruction>(NAME # _B)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
-}
-
+// As above but shift amount takes the form of a "vector immediate". 
+multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm, 
+                                               string Ps, SDPatternOperator op> 
+: sve_int_bin_pred_shift_imm_left<opc, asm, Ps, null_frag> { 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8,  !cast<Instruction>(NAME # _B)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>; 
+} 
+ 
 multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
   def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8,  tvecshiftL8,  FalseLanesZero>;
   def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
@@ -4939,16 +4939,16 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
   def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1,  nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
 }
 
-// As above but shift amount takes the form of a "vector immediate".
-multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
-                                            string Ps, SDPatternOperator op>
-: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> {
-  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8,  !cast<Instruction>(NAME # _B)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
-}
-
+// As above but shift amount takes the form of a "vector immediate". 
+multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm, 
+                                            string Ps, SDPatternOperator op> 
+: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> { 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8,  !cast<Instruction>(NAME # _B)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>; 
+} 
+ 
 multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
   def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
   def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
@@ -5089,10 +5089,10 @@ multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
     let Inst{20-19} = imm{4-3};
   }
 
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8,  !cast<Instruction>(NAME # _B)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8,  !cast<Instruction>(NAME # _B)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
@@ -5109,12 +5109,12 @@ multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
     let Inst{20-19} = imm{4-3};
   }
 
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8,  !cast<Instruction>(NAME # _B)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8,  !cast<Instruction>(NAME # _B)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1,  i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1,  i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1,  i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>; 
 }
-
+ 
 //===----------------------------------------------------------------------===//
 // SVE Memory - Store Group
 //===----------------------------------------------------------------------===//
@@ -5623,7 +5623,7 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
                                PPRRegOp pprty>
 : I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
   asm, "\t$Pd, $Pn, $Pm",
-  "", []>, Sched<[]> {
+  "", []>, Sched<[]> { 
   bits<4> Pd;
   bits<4> Pm;
   bits<4> Pn;
@@ -5689,7 +5689,7 @@ class sve_int_rdffr_pred<bit s, string asm>
   let Inst{4}     = 0;
   let Inst{3-0}   = Pd;
 
-  let Defs = !if(s, [NZCV], []);
+  let Defs = !if(s, [NZCV], []); 
   let Uses = [FFR];
 }
 
@@ -5816,11 +5816,11 @@ multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
   def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>;
   def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>;
 
-  def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_3_Op_Pat<bf16, op, nxv8i1, bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>; 
+ 
+  def : SVE_3_Op_Pat<bf16, op, nxv8i1, bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
@@ -5860,8 +5860,8 @@ multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
@@ -5924,8 +5924,8 @@ multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<f32, op, nxv4i1,  nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<f32, op, nxv2i1,  nxv2f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<f64, op, nxv2i1,  nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_2_Op_Pat<bf16, op, nxv8i1,  nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_2_Op_Pat<bf16, op, nxv8i1,  nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -5962,8 +5962,8 @@ multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1,  nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1,  nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1,  nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
-  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ 
+  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>; 
 }
 
 class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
@@ -6019,20 +6019,20 @@ multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
   def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>;
   def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
-multiclass sve_int_perm_rev_revb<string asm, SDPatternOperator op> {
+multiclass sve_int_perm_rev_revb<string asm, SDPatternOperator op> { 
   def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>;
   def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>;
   def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>;
 
-  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
 multiclass sve_int_perm_rev_revh<string asm, SDPatternOperator op> {
@@ -6139,9 +6139,9 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
             (!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
   def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)),
             (!cast<Instruction>(NAME # _D) $passthru, $pg, $splat)>;
-
-  def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
-            (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
+ 
+  def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)), 
+            (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>; 
 }
 
 class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
@@ -6194,8 +6194,8 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
   let Inst{4-0}   = Zt;
 
   let mayLoad = 1;
-  let Uses = !if(nf, [FFR], []);
-  let Defs = !if(nf, [FFR], []);
+  let Uses = !if(nf, [FFR], []); 
+  let Defs = !if(nf, [FFR], []); 
 }
 
 multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6397,8 +6397,8 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
   let Inst{4-0}   = Zt;
 
   let mayLoad = 1;
-  let Uses = !if(ff, [FFR], []);
-  let Defs = !if(ff, [FFR], []);
+  let Uses = !if(ff, [FFR], []); 
+  let Defs = !if(ff, [FFR], []); 
 }
 
 multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -7227,8 +7227,8 @@ multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
 //===----------------------------------------------------------------------===//
 
 class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
-                     ZPRRegOp zprty, FPRasZPROperand dstOpType>
-: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+                     ZPRRegOp zprty, FPRasZPROperand dstOpType> 
+: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn), 
   asm, "\t$Vd, $Pg, $Zn",
   "",
   []>, Sched<[]> {
@@ -7246,54 +7246,54 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
   let Inst{4-0}   = Vd;
 }
 
-multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
-                                  SDPatternOperator op> {
-  def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
-  def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
-  def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
+multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm, 
+                                  SDPatternOperator op> { 
+  def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; 
+  def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; 
+  def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; 
 
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
 }
 
-multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm,
-                                  SDPatternOperator op> {
-  def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
-  def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
-  def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
-  def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm, 
+                                  SDPatternOperator op> { 
+  def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>; 
+  def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>; 
+  def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>; 
+  def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>; 
 
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
-multiclass sve_int_reduce_1<bits<3> opc, string asm,
-                            SDPatternOperator op> {
-  def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>;
-  def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>;
-  def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>;
-  def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_1<bits<3> opc, string asm, 
+                            SDPatternOperator op> { 
+  def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>; 
+  def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>; 
+  def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>; 
+  def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>; 
 
-  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
-multiclass sve_int_reduce_2<bits<3> opc, string asm,
-                            SDPatternOperator op> {
-  def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>;
-  def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>;
-  def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>;
-  def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_2<bits<3> opc, string asm, 
+                            SDPatternOperator op> { 
+  def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>; 
+  def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>; 
+  def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>; 
+  def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>; 
 
-  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
-  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>;
-  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>;
-  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; 
+  def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME # _H)>; 
+  def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME # _S)>; 
+  def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME # _D)>; 
 }
 
 class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
@@ -7398,7 +7398,7 @@ class sve_int_brkn<bit S, string asm>
   let Inst{3-0}   = Pdm;
 
   let Constraints = "$Pdm = $_Pdm";
-  let Defs = !if(S, [NZCV], []);
+  let Defs = !if(S, [NZCV], []); 
 }
 
 multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -7900,8 +7900,8 @@ multiclass sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand listty,
   def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
                   (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
 
-  def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
-            (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
+  def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))), 
+            (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>; 
 }
 
 //===----------------------------------------------------------------------===//
@@ -7935,7 +7935,7 @@ multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatter
   def : SVE_2_Op_Pat<nxv4f32,  op, nxv4f32,  nxv4f32,  !cast<Instruction>(NAME)>;
   def : SVE_2_Op_Pat<nxv2i64,  op, nxv2i64,  nxv2i64,  !cast<Instruction>(NAME)>;
   def : SVE_2_Op_Pat<nxv2f64,  op, nxv2f64,  nxv2f64,  !cast<Instruction>(NAME)>;
-  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+  def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>; 
 }
 
 /// Addressing modes
@@ -7954,10 +7954,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
   def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
 
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
-  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
-  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>; 
+  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>; 
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
-  def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+  def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>; 
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
 }
 
@@ -7982,19 +7982,19 @@ multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
 }
-
-// Predicated pseudo integer two operand instructions. Second operand is an
-// immediate specified by imm_[bhsd].
-multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
-                                   ComplexPattern imm_b, ComplexPattern imm_h,
-                                   ComplexPattern imm_s, ComplexPattern imm_d> {
-  def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8,  Operand<i32>, FalseLanesUndef>;
-  def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
-  def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
-  def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
-
-  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
-  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
-}
+ 
+// Predicated pseudo integer two operand instructions. Second operand is an 
+// immediate specified by imm_[bhsd]. 
+multiclass sve_int_shift_pred_bhsd<SDPatternOperator op, 
+                                   ComplexPattern imm_b, ComplexPattern imm_h, 
+                                   ComplexPattern imm_s, ComplexPattern imm_d> { 
+  def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8,  Operand<i32>, FalseLanesUndef>; 
+  def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>; 
+  def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>; 
+  def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>; 
+ 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1,  i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1,  i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>; 
+  def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1,  i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>; 
+} 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 9911f33371..e312d9d28b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -37,7 +37,7 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
+#define DEBUG_TYPE "aarch64-sve-intrinsic-opts" 
 
 namespace llvm {
 void initializeSVEIntrinsicOptsPass(PassRegistry &);
@@ -177,50 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
   if (isa<PHINode>(I->getArgOperand(0)))
     return processPhiNode(I);
 
-  SmallVector<Instruction *, 32> CandidatesForRemoval;
-  Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
-
-  const auto *IVTy = cast<VectorType>(I->getType());
-
-  // Walk the chain of conversions.
-  while (Cursor) {
-    // If the type of the cursor has fewer lanes than the final result, zeroing
-    // must take place, which breaks the equivalence chain.
-    const auto *CursorVTy = cast<VectorType>(Cursor->getType());
-    if (CursorVTy->getElementCount().getKnownMinValue() <
-        IVTy->getElementCount().getKnownMinValue())
-      break;
-
-    // If the cursor has the same type as I, it is a viable replacement.
-    if (Cursor->getType() == IVTy)
-      EarliestReplacement = Cursor;
-
-    auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
-
-    // If this is not an SVE conversion intrinsic, this is the end of the chain.
-    if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
-                                  Intrinsic::aarch64_sve_convert_to_svbool ||
-                              IntrinsicCursor->getIntrinsicID() ==
-                                  Intrinsic::aarch64_sve_convert_from_svbool))
-      break;
-
-    CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
-    Cursor = IntrinsicCursor->getOperand(0);
-  }
-
-  // If no viable replacement in the conversion chain was found, there is
-  // nothing to do.
-  if (!EarliestReplacement)
+  SmallVector<Instruction *, 32> CandidatesForRemoval; 
+  Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr; 
+
+  const auto *IVTy = cast<VectorType>(I->getType()); 
+ 
+  // Walk the chain of conversions. 
+  while (Cursor) { 
+    // If the type of the cursor has fewer lanes than the final result, zeroing 
+    // must take place, which breaks the equivalence chain. 
+    const auto *CursorVTy = cast<VectorType>(Cursor->getType()); 
+    if (CursorVTy->getElementCount().getKnownMinValue() < 
+        IVTy->getElementCount().getKnownMinValue()) 
+      break; 
+ 
+    // If the cursor has the same type as I, it is a viable replacement. 
+    if (Cursor->getType() == IVTy) 
+      EarliestReplacement = Cursor; 
+ 
+    auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor); 
+ 
+    // If this is not an SVE conversion intrinsic, this is the end of the chain. 
+    if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == 
+                                  Intrinsic::aarch64_sve_convert_to_svbool || 
+                              IntrinsicCursor->getIntrinsicID() == 
+                                  Intrinsic::aarch64_sve_convert_from_svbool)) 
+      break; 
+ 
+    CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor); 
+    Cursor = IntrinsicCursor->getOperand(0); 
+  } 
+ 
+  // If no viable replacement in the conversion chain was found, there is 
+  // nothing to do. 
+  if (!EarliestReplacement) 
     return false;
 
-  I->replaceAllUsesWith(EarliestReplacement);
+  I->replaceAllUsesWith(EarliestReplacement); 
   I->eraseFromParent();
 
-  while (!CandidatesForRemoval.empty()) {
-    Instruction *Candidate = CandidatesForRemoval.pop_back_val();
-    if (Candidate->use_empty())
-      Candidate->eraseFromParent();
-  }
+  while (!CandidatesForRemoval.empty()) { 
+    Instruction *Candidate = CandidatesForRemoval.pop_back_val(); 
+    if (Candidate->use_empty()) 
+      Candidate->eraseFromParent(); 
+  } 
   return true;
 }
 
@@ -276,8 +276,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
     case Intrinsic::aarch64_sve_ptest_any:
     case Intrinsic::aarch64_sve_ptest_first:
     case Intrinsic::aarch64_sve_ptest_last:
-      for (User *U : F.users())
-        Functions.insert(cast<Instruction>(U)->getFunction());
+      for (User *U : F.users()) 
+        Functions.insert(cast<Instruction>(U)->getFunction()); 
       break;
     default:
       break;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
index bb7d4a2c89..cf2f9565d1 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
@@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/lib/Support
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/lib/Support 
 )
 
 ADDINCL(
-    contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+    contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ac59d73fd9..8a90a74841 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -26,13 +26,13 @@ namespace llvm {
 
 
 namespace llvm {
-  namespace AArch64DBnXS {
-#define GET_DBNXS_IMPL
-#include "AArch64GenSystemOperands.inc"
-  }
-}
-
-namespace llvm {
+  namespace AArch64DBnXS { 
+#define GET_DBNXS_IMPL 
+#include "AArch64GenSystemOperands.inc" 
+  } 
+} 
+ 
+namespace llvm { 
   namespace AArch64DB {
 #define GET_DB_IMPL
 #include "AArch64GenSystemOperands.inc"
@@ -165,7 +165,7 @@ std::string AArch64SysReg::genericRegisterString(uint32_t Bits) {
 
 namespace llvm {
   namespace AArch64TLBI {
-#define GET_TLBITable_IMPL
+#define GET_TLBITable_IMPL 
 #include "AArch64GenSystemOperands.inc"
   }
 }
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 1b13c94389..6d737ac8e1 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -338,14 +338,14 @@ struct SysAliasReg : SysAlias {
       : SysAlias(N, E, F), NeedsReg(R) {}
 };
 
-struct SysAliasImm : SysAlias {
-  uint16_t ImmValue;
-  constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
-      : SysAlias(N, E), ImmValue(I) {}
-  constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I, FeatureBitset F)
-      : SysAlias(N, E, F), ImmValue(I) {}
-};
-
+struct SysAliasImm : SysAlias { 
+  uint16_t ImmValue; 
+  constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I) 
+      : SysAlias(N, E), ImmValue(I) {} 
+  constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I, FeatureBitset F) 
+      : SysAlias(N, E, F), ImmValue(I) {} 
+}; 
+ 
 namespace AArch64AT{
   struct AT : SysAlias {
     using SysAlias::SysAlias;
@@ -362,14 +362,14 @@ namespace AArch64DB {
   #include "AArch64GenSystemOperands.inc"
 }
 
-namespace AArch64DBnXS {
-  struct DBnXS : SysAliasImm {
-    using SysAliasImm::SysAliasImm;
-  };
-  #define GET_DBNXS_DECL
-  #include "AArch64GenSystemOperands.inc"
-}
-
+namespace AArch64DBnXS { 
+  struct DBnXS : SysAliasImm { 
+    using SysAliasImm::SysAliasImm; 
+  }; 
+  #define GET_DBNXS_DECL 
+  #include "AArch64GenSystemOperands.inc" 
+} 
+ 
 namespace  AArch64DC {
   struct DC : SysAlias {
     using SysAlias::SysAlias;
@@ -568,7 +568,7 @@ namespace AArch64TLBI {
   struct TLBI : SysAliasReg {
     using SysAliasReg::SysAliasReg;
   };
-  #define GET_TLBITable_DECL
+  #define GET_TLBITable_DECL 
   #include "AArch64GenSystemOperands.inc"
 }
 
@@ -622,7 +622,7 @@ namespace AArch64II {
     MO_HI12 = 7,
 
     /// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
-    /// reference is actually to the ".refptr.FOO" symbol.  This is used for
+    /// reference is actually to the ".refptr.FOO" symbol.  This is used for 
     /// stub symbols on windows.
     MO_COFFSTUB = 0x8,
 
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
index 3668c2a650..37d19feb17 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
@@ -12,15 +12,15 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/Support
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/Support 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64/Utils
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64/Utils 
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/ya.make
index 244cbc7f34..0c05f2840f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/ya.make
@@ -15,28 +15,28 @@ LICENSE(
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
 PEERDIR(
-    contrib/libs/llvm12
-    contrib/libs/llvm12/include
-    contrib/libs/llvm12/lib/Analysis
-    contrib/libs/llvm12/lib/CodeGen
-    contrib/libs/llvm12/lib/CodeGen/AsmPrinter
-    contrib/libs/llvm12/lib/CodeGen/GlobalISel
-    contrib/libs/llvm12/lib/CodeGen/SelectionDAG
-    contrib/libs/llvm12/lib/IR
-    contrib/libs/llvm12/lib/MC
-    contrib/libs/llvm12/lib/Support
-    contrib/libs/llvm12/lib/Target
-    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
-    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
-    contrib/libs/llvm12/lib/Target/AArch64/Utils
-    contrib/libs/llvm12/lib/Transforms/CFGuard
-    contrib/libs/llvm12/lib/Transforms/Scalar
-    contrib/libs/llvm12/lib/Transforms/Utils
+    contrib/libs/llvm12 
+    contrib/libs/llvm12/include 
+    contrib/libs/llvm12/lib/Analysis 
+    contrib/libs/llvm12/lib/CodeGen 
+    contrib/libs/llvm12/lib/CodeGen/AsmPrinter 
+    contrib/libs/llvm12/lib/CodeGen/GlobalISel 
+    contrib/libs/llvm12/lib/CodeGen/SelectionDAG 
+    contrib/libs/llvm12/lib/IR 
+    contrib/libs/llvm12/lib/MC 
+    contrib/libs/llvm12/lib/Support 
+    contrib/libs/llvm12/lib/Target 
+    contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc 
+    contrib/libs/llvm12/lib/Target/AArch64/TargetInfo 
+    contrib/libs/llvm12/lib/Target/AArch64/Utils 
+    contrib/libs/llvm12/lib/Transforms/CFGuard 
+    contrib/libs/llvm12/lib/Transforms/Scalar 
+    contrib/libs/llvm12/lib/Transforms/Utils 
 )
 
 ADDINCL(
-    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
-    contrib/libs/llvm12/lib/Target/AArch64
+    ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64 
+    contrib/libs/llvm12/lib/Target/AArch64 
 )
 
 NO_COMPILER_WARNINGS()
@@ -88,8 +88,8 @@ SRCS(
     GISel/AArch64InstructionSelector.cpp
     GISel/AArch64LegalizerInfo.cpp
     GISel/AArch64PostLegalizerCombiner.cpp
-    GISel/AArch64PostLegalizerLowering.cpp
-    GISel/AArch64PostSelectOptimize.cpp
+    GISel/AArch64PostLegalizerLowering.cpp 
+    GISel/AArch64PostSelectOptimize.cpp 
     GISel/AArch64PreLegalizerCombiner.cpp
     GISel/AArch64RegisterBankInfo.cpp
     SVEIntrinsicOpts.cpp
author	shadchin <shadchin@yandex-team.ru>	2022-02-10 16:44:30 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:30 +0300
commit	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree	012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64
parent	6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
download	ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz