aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/AArch64
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/AArch64
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/AArch64')
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64.h12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64.td352
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp510
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp6
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp100
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td122
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp62
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp88
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp790
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h40
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp126
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp5278
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h216
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td202
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td120
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp784
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h96
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td544
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp66
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp164
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h108
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp26
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp166
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h30
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td52
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td842
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td678
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td82
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td38
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td7780
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td1490
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp8
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp26
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp332
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp36
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h84
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td330
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp70
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp356
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h66
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp1118
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make22
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp86
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make22
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp64
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h6
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h58
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp2062
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp454
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h4
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp374
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp1408
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp374
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp20
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp150
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h18
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h8
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp110
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp62
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h14
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp2
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h4
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp10
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp112
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h36
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp72
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make20
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td726
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp86
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make8
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp16
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h36
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make12
-rw-r--r--contrib/libs/llvm12/lib/Target/AArch64/ya.make42
85 files changed, 14997 insertions, 14997 deletions
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
index d2170a99e0..88d25e474e 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.h
@@ -58,10 +58,10 @@ ModulePass *createSVEIntrinsicOptsPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
-FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
-FunctionPass *createAArch64PostLegalizerLowering();
-FunctionPass *createAArch64PostSelectOptimize();
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
+FunctionPass *createAArch64PostLegalizerLowering();
+FunctionPass *createAArch64PostSelectOptimize();
FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPreRAPass();
@@ -82,8 +82,8 @@ void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
-void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
-void initializeAArch64PostSelectOptimizePass(PassRegistry &);
+void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);
+void initializeAArch64PostSelectOptimizePass(PassRegistry &);
void initializeAArch64PromoteConstantPass(PassRegistry&);
void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
void initializeAArch64StorePairSuppressPass(PassRegistry&);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
index 762855207d..385216a208 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64.td
@@ -61,9 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
-def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
- "Enable out of line atomics to support LSE instructions">;
-
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
+ "Enable out of line atomics to support LSE instructions">;
+
def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
"Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
@@ -75,12 +75,12 @@ def FeatureLOR : SubtargetFeature<
"lor", "HasLOR", "true",
"Enables ARM v8.1 Limited Ordering Regions extension">;
-def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
- "true", "Enable RW operand CONTEXTIDR_EL2" >;
-
-def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
- "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;
+def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
+ "true", "Enable RW operand CONTEXTIDR_EL2" >;
+def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
+ "Enables ARM v8.1 Virtual Host extension", [FeatureCONTEXTIDREL2] >;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
@@ -218,10 +218,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
-def FeatureCmpBccFusion : SubtargetFeature<
- "cmp-bcc-fusion", "HasCmpBccFusion", "true",
- "CPU fuses cmp+bcc operations">;
-
+def FeatureCmpBccFusion : SubtargetFeature<
+ "cmp-bcc-fusion", "HasCmpBccFusion", "true",
+ "CPU fuses cmp+bcc operations">;
+
def FeatureFuseAddress : SubtargetFeature<
"fuse-address", "HasFuseAddress", "true",
"CPU fuses address generation and memory operations">;
@@ -265,8 +265,8 @@ def FeatureDotProd : SubtargetFeature<
"dotprod", "HasDotProd", "true",
"Enable dot product support">;
-def FeaturePAuth : SubtargetFeature<
- "pauth", "HasPAuth", "true",
+def FeaturePAuth : SubtargetFeature<
+ "pauth", "HasPAuth", "true",
"Enable v8.3-A Pointer Authentication extension">;
def FeatureJS : SubtargetFeature<
@@ -320,8 +320,8 @@ def FeatureTLB_RMI : SubtargetFeature<
"tlb-rmi", "HasTLB_RMI", "true",
"Enable v8.4-A TLB Range and Maintenance Instructions">;
-def FeatureFlagM : SubtargetFeature<
- "flagm", "HasFlagM", "true",
+def FeatureFlagM : SubtargetFeature<
+ "flagm", "HasFlagM", "true",
"Enable v8.4-A Flag Manipulation Instructions">;
// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
@@ -404,24 +404,24 @@ def FeatureMatMulFP32 : SubtargetFeature<"f32mm", "HasMatMulFP32",
def FeatureMatMulFP64 : SubtargetFeature<"f64mm", "HasMatMulFP64",
"true", "Enable Matrix Multiply FP64 Extension", [FeatureSVE]>;
-def FeatureXS : SubtargetFeature<"xs", "HasXS",
- "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">;
-
-def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT",
- "true", "Enable Armv8.7-A WFET and WFIT instruction">;
-
-def FeatureHCX : SubtargetFeature<
- "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">;
-
-def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
- "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
-
-def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
- "true", "Enable Branch Record Buffer Extension">;
-
-def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
- "true", "Enable extra register in the Statistical Profiling Extension">;
-
+def FeatureXS : SubtargetFeature<"xs", "HasXS",
+ "true", "Enable Armv8.7-A limited-TLB-maintenance instruction">;
+
+def FeatureWFxT : SubtargetFeature<"wfxt", "HasWFxT",
+ "true", "Enable Armv8.7-A WFET and WFIT instruction">;
+
+def FeatureHCX : SubtargetFeature<
+ "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register">;
+
+def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
+ "true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
+
+def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
+ "true", "Enable Branch Record Buffer Extension">;
+
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
+ "true", "Enable extra register in the Statistical Profiling Extension">;
+
def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
"true", "Enable fine grained virtualization traps extension">;
@@ -442,14 +442,14 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
- "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth,
+ "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePAuth,
FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
- FeatureNV, FeatureMPAM, FeatureDIT,
+ FeatureNV, FeatureMPAM, FeatureDIT,
FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
- FeatureFlagM, FeatureRCPC_IMMO]>;
+ FeatureFlagM, FeatureRCPC_IMMO]>;
def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
@@ -462,26 +462,26 @@ def HasV8_6aOps : SubtargetFeature<
[HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
FeatureEnhancedCounterVirtualization, FeatureMatMulInt8]>;
-def HasV8_7aOps : SubtargetFeature<
- "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
- [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
-
-def HasV8_0rOps : SubtargetFeature<
- "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
- [//v8.1
- FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
- //v8.2
- FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
- FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
- //v8.3
- FeatureComplxNum, FeatureCCIDX, FeatureJS,
- FeaturePAuth, FeatureRCPC,
- //v8.4
- FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
- FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
- //v8.5
- FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
-
+def HasV8_7aOps : SubtargetFeature<
+ "v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
+ [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+
+def HasV8_0rOps : SubtargetFeature<
+ "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
+ [//v8.1
+ FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
+ //v8.2
+ FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
+ FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
+ //v8.3
+ FeatureComplxNum, FeatureCCIDX, FeatureJS,
+ FeaturePAuth, FeatureRCPC,
+ //v8.4
+ FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
+ FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
+ //v8.5
+ FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -543,11 +543,11 @@ def SVEUnsupported : AArch64Unsupported {
}
def PAUnsupported : AArch64Unsupported {
- let F = [HasPAuth];
+ let F = [HasPAuth];
}
include "AArch64SchedA53.td"
-include "AArch64SchedA55.td"
+include "AArch64SchedA55.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
@@ -557,9 +557,9 @@ include "AArch64SchedExynosM4.td"
include "AArch64SchedExynosM5.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
-include "AArch64SchedA64FX.td"
+include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
-include "AArch64SchedTSV110.td"
+include "AArch64SchedTSV110.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -619,9 +619,9 @@ def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
FeatureDotProd,
FeatureFPARMv8,
FeatureFullFP16,
- FeatureFuseAddress,
- FeatureFuseAES,
- FeatureFuseLiterals,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeatureRAS,
FeatureRCPC,
@@ -634,7 +634,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
- FeatureFuseLiterals,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon
]>;
@@ -666,7 +666,7 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
HasV8_2aOps,
FeatureFPARMv8,
- FeatureFuseAES,
+ FeatureFuseAES,
FeatureNEON,
FeatureRCPC,
FeatureCrypto,
@@ -678,9 +678,9 @@ def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
HasV8_2aOps,
- FeatureCmpBccFusion,
+ FeatureCmpBccFusion,
FeatureFPARMv8,
- FeatureFuseAES,
+ FeatureFuseAES,
FeatureNEON, FeatureRCPC,
FeatureCrypto,
FeatureFullFP16,
@@ -691,7 +691,7 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
"CortexA78",
"Cortex-A78 ARM processors", [
HasV8_2aOps,
- FeatureCmpBccFusion,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
@@ -704,39 +704,39 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
FeatureSSBS,
FeatureDotProd]>;
-def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
- "CortexA78C",
- "Cortex-A78C ARM processors", [
- HasV8_2aOps,
- FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFlagM,
- FeatureFP16FML,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePAuth,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRCPC,
- FeatureSPE,
- FeatureSSBS]>;
-
-def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
- "CortexR82",
- "Cortex-R82 ARM Processors", [
- FeaturePostRAScheduler,
- // TODO: crypto and FuseAES
- // All other features are implied by v8_0r ops:
- HasV8_0rOps,
- ]>;
-
+def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
+ "CortexA78C",
+ "Cortex-A78C ARM processors", [
+ HasV8_2aOps,
+ FeatureCmpBccFusion,
+ FeatureCrypto,
+ FeatureDotProd,
+ FeatureFlagM,
+ FeatureFP16FML,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureFuseAES,
+ FeatureNEON,
+ FeaturePAuth,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureRCPC,
+ FeatureSPE,
+ FeatureSSBS]>;
+
+def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
+ "CortexR82",
+ "Cortex-R82 ARM Processors", [
+ FeaturePostRAScheduler,
+ // TODO: crypto and FuseAES
+ // All other features are implied by v8_0r ops:
+ HasV8_0rOps,
+ ]>;
+
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
HasV8_2aOps,
- FeatureCmpBccFusion,
+ FeatureCmpBccFusion,
FeatureCrypto,
FeatureFPARMv8,
FeatureFuseAES,
@@ -758,10 +758,10 @@ def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
FeatureFullFP16,
FeatureSVE,
FeaturePostRAScheduler,
- FeatureComplxNum,
- FeatureAggressiveFMA,
- FeatureArithmeticBccFusion,
- FeaturePredictableSelectIsExpensive
+ FeatureComplxNum,
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePredictableSelectIsExpensive
]>;
def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
@@ -868,38 +868,38 @@ def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
HasV8_4aOps
]>;
-def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
- "Apple A14", [
- FeatureAggressiveFMA,
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureAltFPCmp,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFRInt3264,
- FeatureFuseAddress,
- FeatureFuseAES,
- FeatureFuseArithmeticLogic,
- FeatureFuseCCSelect,
- FeatureFuseCryptoEOR,
- FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon,
- FeatureSpecRestrict,
- FeatureSSBS,
- FeatureSB,
- FeaturePredRes,
- FeatureCacheDeepPersist,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureSHA3,
- HasV8_4aOps
- ]>;
-
+def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
+ "Apple A14", [
+ FeatureAggressiveFMA,
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureAltFPCmp,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureFRInt3264,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureSpecRestrict,
+ FeatureSSBS,
+ FeatureSB,
+ FeaturePredRes,
+ FeatureCacheDeepPersist,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureFullFP16,
+ FeatureFP16FML,
+ FeatureSHA3,
+ HasV8_4aOps
+ ]>;
+
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureCRC,
@@ -993,38 +993,38 @@ def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
FeatureSSBS,
]>;
-def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
- "NeoverseN2",
- "Neoverse N2 ARM processors", [
- HasV8_5aOps,
- FeatureBF16,
- FeatureETE,
- FeatureMatMulInt8,
- FeatureMTE,
- FeatureSVE2,
- FeatureSVE2BitPerm,
- FeatureTRBE]>;
-
-def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
- "NeoverseV1",
- "Neoverse V1 ARM processors", [
- HasV8_4aOps,
- FeatureBF16,
- FeatureCacheDeepPersist,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFP16FML,
- FeatureFullFP16,
- FeatureFuseAES,
- FeatureMatMulInt8,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRandGen,
- FeatureSPE,
- FeatureSSBS,
- FeatureSVE]>;
-
+def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
+ "NeoverseN2",
+ "Neoverse N2 ARM processors", [
+ HasV8_5aOps,
+ FeatureBF16,
+ FeatureETE,
+ FeatureMatMulInt8,
+ FeatureMTE,
+ FeatureSVE2,
+ FeatureSVE2BitPerm,
+ FeatureTRBE]>;
+
+def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
+ "NeoverseV1",
+ "Neoverse V1 ARM processors", [
+ HasV8_4aOps,
+ FeatureBF16,
+ FeatureCacheDeepPersist,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureFP16FML,
+ FeatureFullFP16,
+ FeatureFuseAES,
+ FeatureMatMulInt8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureRandGen,
+ FeatureSPE,
+ FeatureSSBS,
+ FeatureSVE]>;
+
def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
FeatureCrypto,
@@ -1065,7 +1065,7 @@ def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureLSE,
- FeaturePAuth,
+ FeaturePAuth,
FeatureUseAA,
FeatureBalanceFPOps,
FeaturePerfMon,
@@ -1147,7 +1147,7 @@ def : ProcessorModel<"generic", NoSchedModel, [
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
@@ -1158,13 +1158,13 @@ def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
-def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
-def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
+def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
-def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
-def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
+def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
+def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
@@ -1180,7 +1180,7 @@ def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
// Marvell ThunderX3T110 Processors.
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
-def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
+def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
@@ -1193,7 +1193,7 @@ def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
-def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
@@ -1203,7 +1203,7 @@ def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA13]>;
// Fujitsu A64FX
-def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
// Nvidia Carmel
def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index c996d2df8c..74fd2411f4 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -123,7 +123,7 @@ static bool isFPR64(unsigned Reg, unsigned SubReg,
}
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
-// copy instruction. Return nullptr if the instruction is not a copy.
+// copy instruction. Return nullptr if the instruction is not a copy.
static MachineOperand *getSrcFromCopy(MachineInstr *MI,
const MachineRegisterInfo *MRI,
unsigned &SubReg) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
index a0c5498ee6..419af6785c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,7 +32,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/FaultMaps.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,7 +55,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -71,13 +71,13 @@ namespace {
class AArch64AsmPrinter : public AsmPrinter {
AArch64MCInstLower MCInstLowering;
StackMaps SM;
- FaultMaps FM;
+ FaultMaps FM;
const AArch64Subtarget *STI;
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this), FM(*this) {}
+ SM(*this), FM(*this) {}
StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
@@ -92,15 +92,15 @@ public:
void emitFunctionEntryLabel() override;
- void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
+ void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
- void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
- void LowerFAULTING_OP(const MachineInstr &MI);
+ void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+ void LowerFAULTING_OP(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
@@ -195,24 +195,24 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
return;
// Assemble feature flags that may require creation of a note section.
- unsigned Flags = 0;
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+ unsigned Flags = 0;
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("branch-target-enforcement")))
+ if (BTE->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
- if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("sign-return-address")))
- if (Sign->getZExtValue())
- Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address")))
+ if (Sign->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
if (Flags == 0)
return;
// Emit a .note.gnu.property section with the flags.
- if (auto *TS = static_cast<AArch64TargetStreamer *>(
- OutStreamer->getTargetStreamer()))
- TS->emitNoteSection(Flags);
+ if (auto *TS = static_cast<AArch64TargetStreamer *>(
+ OutStreamer->getTargetStreamer()))
+ TS->emitNoteSection(Flags);
}
void AArch64AsmPrinter::emitFunctionHeaderComment() {
@@ -303,7 +303,7 @@ void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
utostr(AccessInfo);
if (IsShort)
- SymName += "_short_v2";
+ SymName += "_short_v2";
Sym = OutContext.getOrCreateSymbol(SymName);
}
@@ -320,7 +320,7 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
assert(TT.isOSBinFormatELF());
std::unique_ptr<MCSubtargetInfo> STI(
TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
- assert(STI && "Unable to create subtarget info");
+ assert(STI && "Unable to create subtarget info");
MCSymbol *HwasanTagMismatchV1Sym =
OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
@@ -340,15 +340,15 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
IsShort ? HwasanTagMismatchV2Ref : HwasanTagMismatchV1Ref;
MCSymbol *Sym = P.second;
- bool HasMatchAllTag =
- (AccessInfo >> HWASanAccessInfo::HasMatchAllShift) & 1;
- uint8_t MatchAllTag =
- (AccessInfo >> HWASanAccessInfo::MatchAllShift) & 0xff;
- unsigned Size =
- 1 << ((AccessInfo >> HWASanAccessInfo::AccessSizeShift) & 0xf);
- bool CompileKernel =
- (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1;
-
+ bool HasMatchAllTag =
+ (AccessInfo >> HWASanAccessInfo::HasMatchAllShift) & 1;
+ uint8_t MatchAllTag =
+ (AccessInfo >> HWASanAccessInfo::MatchAllShift) & 0xff;
+ unsigned Size =
+ 1 << ((AccessInfo >> HWASanAccessInfo::AccessSizeShift) & 0xf);
+ bool CompileKernel =
+ (AccessInfo >> HWASanAccessInfo::CompileKernelShift) & 1;
+
OutStreamer->SwitchSection(OutContext.getELFSection(
".text.hot", ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
@@ -359,21 +359,21 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
OutStreamer->emitLabel(Sym);
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::SBFMXri)
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SBFMXri)
.addReg(AArch64::X16)
.addReg(Reg)
.addImm(4)
.addImm(55),
*STI);
OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::LDRBBroX)
- .addReg(AArch64::W16)
- .addReg(IsShort ? AArch64::X20 : AArch64::X9)
- .addReg(AArch64::X16)
- .addImm(0)
- .addImm(0),
- *STI);
- OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::LDRBBroX)
+ .addReg(AArch64::W16)
+ .addReg(IsShort ? AArch64::X20 : AArch64::X9)
+ .addReg(AArch64::X16)
+ .addImm(0)
+ .addImm(0),
+ *STI);
+ OutStreamer->emitInstruction(
MCInstBuilder(AArch64::SUBSXrs)
.addReg(AArch64::XZR)
.addReg(AArch64::X16)
@@ -393,26 +393,26 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
OutStreamer->emitLabel(HandleMismatchOrPartialSym);
- if (HasMatchAllTag) {
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
- .addReg(AArch64::X16)
- .addReg(Reg)
- .addImm(56)
- .addImm(63),
- *STI);
- OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri)
- .addReg(AArch64::XZR)
- .addReg(AArch64::X16)
- .addImm(MatchAllTag)
- .addImm(0),
- *STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
- .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
- *STI);
- }
-
+ if (HasMatchAllTag) {
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(56)
+ .addImm(63),
+ *STI);
+ OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X16)
+ .addImm(MatchAllTag)
+ .addImm(0),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
+ *STI);
+ }
+
if (IsShort) {
OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSWri)
.addReg(AArch64::WZR)
@@ -501,40 +501,40 @@ void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
.addImm(0),
*STI);
OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::MOVZXi)
- .addReg(AArch64::X1)
- .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask)
- .addImm(0),
+ MCInstBuilder(AArch64::MOVZXi)
+ .addReg(AArch64::X1)
+ .addImm(AccessInfo & HWASanAccessInfo::RuntimeMask)
+ .addImm(0),
*STI);
-
- if (CompileKernel) {
- // The Linux kernel's dynamic loader doesn't support GOT relative
- // relocations, but it doesn't support late binding either, so just call
- // the function directly.
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::B).addExpr(HwasanTagMismatchRef), *STI);
- } else {
- // Intentionally load the GOT entry and branch to it, rather than possibly
- // late binding the function, which may clobber the registers before we
- // have a chance to save them.
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::ADRP)
- .addReg(AArch64::X16)
- .addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
- OutContext)),
- *STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::LDRXui)
- .addReg(AArch64::X16)
- .addReg(AArch64::X16)
- .addExpr(AArch64MCExpr::create(
- HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
- OutContext)),
- *STI);
- OutStreamer->emitInstruction(
- MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
- }
+
+ if (CompileKernel) {
+ // The Linux kernel's dynamic loader doesn't support GOT relative
+ // relocations, but it doesn't support late binding either, so just call
+ // the function directly.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::B).addExpr(HwasanTagMismatchRef), *STI);
+ } else {
+ // Intentionally load the GOT entry and branch to it, rather than possibly
+ // late binding the function, which may clobber the registers before we
+ // have a chance to save them.
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::ADRP)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_PAGE,
+ OutContext)),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X16)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef, AArch64MCExpr::VariantKind::VK_GOT_LO12,
+ OutContext)),
+ *STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
+ }
}
}
@@ -550,11 +550,11 @@ void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
-
- // Emit stack and fault map information.
+
+ // Emit stack and fault map information.
emitStackMaps(SM);
- FM.serializeToFaultMapSection();
-
+ FM.serializeToFaultMapSection();
+
}
void AArch64AsmPrinter::EmitLOHs() {
@@ -647,8 +647,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
const TargetRegisterInfo *RI = STI->getRegisterInfo();
Register Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
- if (!RI->regsOverlap(RegToPrint, Reg))
- return true;
+ if (!RI->regsOverlap(RegToPrint, Reg))
+ return true;
O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName);
return false;
}
@@ -809,24 +809,24 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
emitAlignment(Align(Size));
OutStreamer->emitLabel(GetJTISymbol(JTI));
- const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
- const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
-
- for (auto *JTBB : JTBBs) {
- const MCExpr *Value =
- MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext);
-
- // Each entry is:
- // .byte/.hword (LBB - Lbase)>>2
- // or plain:
- // .word LBB - Lbase
- Value = MCBinaryExpr::createSub(Value, Base, OutContext);
- if (Size != 4)
- Value = MCBinaryExpr::createLShr(
- Value, MCConstantExpr::create(2, OutContext), OutContext);
-
- OutStreamer->emitValue(Value, Size);
- }
+ const MCSymbol *BaseSym = AArch64FI->getJumpTableEntryPCRelSymbol(JTI);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+
+ for (auto *JTBB : JTBBs) {
+ const MCExpr *Value =
+ MCSymbolRefExpr::create(JTBB->getSymbol(), OutContext);
+
+ // Each entry is:
+ // .byte/.hword (LBB - Lbase)>>2
+ // or plain:
+ // .word LBB - Lbase
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ if (Size != 4)
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, OutContext), OutContext);
+
+ OutStreamer->emitValue(Value, Size);
+ }
}
}
@@ -851,9 +851,9 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() {
///
/// adr xDest, .LBB0_0
/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
-/// add xDest, xDest, xScratch (with "lsl #2" for smaller entries)
-void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
- const llvm::MachineInstr &MI) {
+/// add xDest, xDest, xScratch (with "lsl #2" for smaller entries)
+void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register ScratchRegW =
@@ -861,50 +861,50 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
Register TableReg = MI.getOperand(2).getReg();
Register EntryReg = MI.getOperand(3).getReg();
int JTIdx = MI.getOperand(4).getIndex();
- int Size = AArch64FI->getJumpTableEntrySize(JTIdx);
+ int Size = AArch64FI->getJumpTableEntrySize(JTIdx);
// This has to be first because the compression pass based its reachability
// calculations on the start of the JumpTableDest instruction.
auto Label =
MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
-
- // If we don't already have a symbol to use as the base, use the ADR
- // instruction itself.
- if (!Label) {
- Label = MF->getContext().createTempSymbol();
- AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label);
- OutStreamer.emitLabel(Label);
- }
-
- auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext());
+
+ // If we don't already have a symbol to use as the base, use the ADR
+ // instruction itself.
+ if (!Label) {
+ Label = MF->getContext().createTempSymbol();
+ AArch64FI->setJumpTableEntryInfo(JTIdx, Size, Label);
+ OutStreamer.emitLabel(Label);
+ }
+
+ auto LabelExpr = MCSymbolRefExpr::create(Label, MF->getContext());
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
.addReg(DestReg)
- .addExpr(LabelExpr));
+ .addExpr(LabelExpr));
// Load the number of instruction-steps to offset from the label.
- unsigned LdrOpcode;
- switch (Size) {
- case 1: LdrOpcode = AArch64::LDRBBroX; break;
- case 2: LdrOpcode = AArch64::LDRHHroX; break;
- case 4: LdrOpcode = AArch64::LDRSWroX; break;
- default:
- llvm_unreachable("Unknown jump table size");
- }
-
+ unsigned LdrOpcode;
+ switch (Size) {
+ case 1: LdrOpcode = AArch64::LDRBBroX; break;
+ case 2: LdrOpcode = AArch64::LDRHHroX; break;
+ case 4: LdrOpcode = AArch64::LDRSWroX; break;
+ default:
+ llvm_unreachable("Unknown jump table size");
+ }
+
EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
- .addReg(Size == 4 ? ScratchReg : ScratchRegW)
+ .addReg(Size == 4 ? ScratchReg : ScratchRegW)
.addReg(TableReg)
.addReg(EntryReg)
.addImm(0)
- .addImm(Size == 1 ? 0 : 1));
+ .addImm(Size == 1 ? 0 : 1));
- // Add to the already materialized base label address, multiplying by 4 if
- // compressed.
+ // Add to the already materialized base label address, multiplying by 4 if
+ // compressed.
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
.addReg(DestReg)
.addReg(DestReg)
.addReg(ScratchReg)
- .addImm(Size == 4 ? 0 : 2));
+ .addImm(Size == 4 ? 0 : 2));
}
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -982,83 +982,83 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
}
-void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
- StatepointOpers SOpers(&MI);
- if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
- assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
- for (unsigned i = 0; i < PatchBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
- } else {
- // Lower call target and choose correct opcode
- const MachineOperand &CallTarget = SOpers.getCallTarget();
- MCOperand CallTargetMCOp;
- unsigned CallOpcode;
- switch (CallTarget.getType()) {
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp);
- CallOpcode = AArch64::BL;
- break;
- case MachineOperand::MO_Immediate:
- CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
- CallOpcode = AArch64::BL;
- break;
- case MachineOperand::MO_Register:
- CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
- CallOpcode = AArch64::BLR;
- break;
- default:
- llvm_unreachable("Unsupported operand type in statepoint call target");
- break;
- }
-
- EmitToStreamer(OutStreamer,
- MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp));
- }
-
- auto &Ctx = OutStreamer.getContext();
- MCSymbol *MILabel = Ctx.createTempSymbol();
- OutStreamer.emitLabel(MILabel);
- SM.recordStatepoint(*MILabel, MI);
-}
-
-void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
- // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
- // <opcode>, <operands>
-
- Register DefRegister = FaultingMI.getOperand(0).getReg();
- FaultMaps::FaultKind FK =
- static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
- MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
- unsigned Opcode = FaultingMI.getOperand(3).getImm();
- unsigned OperandsBeginIdx = 4;
-
- auto &Ctx = OutStreamer->getContext();
- MCSymbol *FaultingLabel = Ctx.createTempSymbol();
- OutStreamer->emitLabel(FaultingLabel);
-
- assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
- FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
-
- MCInst MI;
- MI.setOpcode(Opcode);
-
- if (DefRegister != (Register)0)
- MI.addOperand(MCOperand::createReg(DefRegister));
-
- for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
- E = FaultingMI.operands_end();
- I != E; ++I) {
- MCOperand Dest;
- lowerOperand(*I, Dest);
- MI.addOperand(Dest);
- }
-
- OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
- OutStreamer->emitInstruction(MI, getSubtargetInfo());
-}
-
+void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ StatepointOpers SOpers(&MI);
+ if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
+ assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ for (unsigned i = 0; i < PatchBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+ } else {
+ // Lower call target and choose correct opcode
+ const MachineOperand &CallTarget = SOpers.getCallTarget();
+ MCOperand CallTargetMCOp;
+ unsigned CallOpcode;
+ switch (CallTarget.getType()) {
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp);
+ CallOpcode = AArch64::BL;
+ break;
+ case MachineOperand::MO_Immediate:
+ CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
+ CallOpcode = AArch64::BL;
+ break;
+ case MachineOperand::MO_Register:
+ CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
+ CallOpcode = AArch64::BLR;
+ break;
+ default:
+ llvm_unreachable("Unsupported operand type in statepoint call target");
+ break;
+ }
+
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp));
+ }
+
+ auto &Ctx = OutStreamer.getContext();
+ MCSymbol *MILabel = Ctx.createTempSymbol();
+ OutStreamer.emitLabel(MILabel);
+ SM.recordStatepoint(*MILabel, MI);
+}
+
+void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
+ // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
+ // <opcode>, <operands>
+
+ Register DefRegister = FaultingMI.getOperand(0).getReg();
+ FaultMaps::FaultKind FK =
+ static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
+ MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
+ unsigned Opcode = FaultingMI.getOperand(3).getImm();
+ unsigned OperandsBeginIdx = 4;
+
+ auto &Ctx = OutStreamer->getContext();
+ MCSymbol *FaultingLabel = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(FaultingLabel);
+
+ assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
+ FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
+
+ MCInst MI;
+ MI.setOpcode(Opcode);
+
+ if (DefRegister != (Register)0)
+ MI.addOperand(MCOperand::createReg(DefRegister));
+
+ for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
+ E = FaultingMI.operands_end();
+ I != E; ++I) {
+ MCOperand Dest;
+ lowerOperand(*I, Dest);
+ MI.addOperand(Dest);
+ }
+
+ OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
+ OutStreamer->emitInstruction(MI, getSubtargetInfo());
+}
+
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
@@ -1272,28 +1272,28 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, Adrp);
MCInst Ldr;
- if (STI->isTargetILP32()) {
- Ldr.setOpcode(AArch64::LDRWui);
- Ldr.addOperand(MCOperand::createReg(AArch64::W1));
- } else {
- Ldr.setOpcode(AArch64::LDRXui);
- Ldr.addOperand(MCOperand::createReg(AArch64::X1));
- }
+ if (STI->isTargetILP32()) {
+ Ldr.setOpcode(AArch64::LDRWui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::W1));
+ } else {
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::createReg(AArch64::X1));
+ }
Ldr.addOperand(MCOperand::createReg(AArch64::X0));
Ldr.addOperand(SymTLSDescLo12);
Ldr.addOperand(MCOperand::createImm(0));
EmitToStreamer(*OutStreamer, Ldr);
MCInst Add;
- if (STI->isTargetILP32()) {
- Add.setOpcode(AArch64::ADDWri);
- Add.addOperand(MCOperand::createReg(AArch64::W0));
- Add.addOperand(MCOperand::createReg(AArch64::W0));
- } else {
- Add.setOpcode(AArch64::ADDXri);
- Add.addOperand(MCOperand::createReg(AArch64::X0));
- Add.addOperand(MCOperand::createReg(AArch64::X0));
- }
+ if (STI->isTargetILP32()) {
+ Add.setOpcode(AArch64::ADDWri);
+ Add.addOperand(MCOperand::createReg(AArch64::W0));
+ Add.addOperand(MCOperand::createReg(AArch64::W0));
+ } else {
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ Add.addOperand(MCOperand::createReg(AArch64::X0));
+ }
Add.addOperand(SymTLSDescLo12);
Add.addOperand(MCOperand::createImm(AArch64_AM::getShiftValue(0)));
EmitToStreamer(*OutStreamer, Add);
@@ -1313,10 +1313,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
- case AArch64::JumpTableDest32:
+ case AArch64::JumpTableDest32:
case AArch64::JumpTableDest16:
case AArch64::JumpTableDest8:
- LowerJumpTableDest(*OutStreamer, *MI);
+ LowerJumpTableDest(*OutStreamer, *MI);
return;
case AArch64::FMOVH0:
@@ -1331,12 +1331,12 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHPOINT:
return LowerPATCHPOINT(*OutStreamer, SM, *MI);
- case TargetOpcode::STATEPOINT:
- return LowerSTATEPOINT(*OutStreamer, SM, *MI);
-
- case TargetOpcode::FAULTING_OP:
- return LowerFAULTING_OP(*MI);
-
+ case TargetOpcode::STATEPOINT:
+ return LowerSTATEPOINT(*OutStreamer, SM, *MI);
+
+ case TargetOpcode::FAULTING_OP:
+ return LowerFAULTING_OP(*MI);
+
case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
LowerPATCHABLE_FUNCTION_ENTER(*MI);
return;
@@ -1381,14 +1381,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
case AArch64::SEH_SaveRegP:
- if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
- MI->getOperand(0).getImm() <= 28) {
- assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
- "Register paired with LR must be odd");
- TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
- MI->getOperand(2).getImm());
- return;
- }
+ if (MI->getOperand(1).getImm() == 30 && MI->getOperand(0).getImm() >= 19 &&
+ MI->getOperand(0).getImm() <= 28) {
+ assert((MI->getOperand(0).getImm() - 19) % 2 == 0 &&
+ "Register paired with LR must be odd");
+ TS->EmitARM64WinCFISaveLRPair(MI->getOperand(0).getImm(),
+ MI->getOperand(2).getImm());
+ return;
+ }
assert((MI->getOperand(1).getImm() - MI->getOperand(0).getImm() == 1) &&
"Non-consecutive registers not allowed for save_regp");
TS->EmitARM64WinCFISaveRegP(MI->getOperand(0).getImm(),
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
index d3b5166585..12a4c8ce9d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -16,7 +16,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -58,13 +58,13 @@ FunctionPass *llvm::createAArch64BranchTargetsPass() {
}
bool AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ if (!MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
return false;
LLVM_DEBUG(
dbgs() << "********** AArch64 Branch Targets **********\n"
<< "********** Function: " << MF.getName() << '\n');
- const Function &F = MF.getFunction();
+ const Function &F = MF.getFunction();
// LLVM does not consider basic blocks which are the targets of jump tables
// to be address-taken (the address can't escape anywhere else), but they are
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
index c51dd48cab..ab1a31e1e7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -42,51 +42,51 @@ static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
CCState &State, Align SlotAlign) {
- if (LocVT.isScalableVector()) {
- const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
- State.getMachineFunction().getSubtarget());
- const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
-
- // We are about to reinvoke the CCAssignFn auto-generated handler. If we
- // don't unset these flags we will get stuck in an infinite loop forever
- // invoking the custom handler.
- ArgFlags.setInConsecutiveRegs(false);
- ArgFlags.setInConsecutiveRegsLast(false);
-
- // The calling convention for passing SVE tuples states that in the event
- // we cannot allocate enough registers for the tuple we should still leave
- // any remaining registers unallocated. However, when we call the
- // CCAssignFn again we want it to behave as if all remaining registers are
- // allocated. This will force the code to pass the tuple indirectly in
- // accordance with the PCS.
- bool RegsAllocated[8];
- for (int I = 0; I < 8; I++) {
- RegsAllocated[I] = State.isAllocated(ZRegList[I]);
- State.AllocateReg(ZRegList[I]);
- }
-
- auto &It = PendingMembers[0];
- CCAssignFn *AssignFn =
- TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
- if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
- ArgFlags, State))
- llvm_unreachable("Call operand has unhandled type");
-
- // Return the flags to how they were before.
- ArgFlags.setInConsecutiveRegs(true);
- ArgFlags.setInConsecutiveRegsLast(true);
-
- // Return the register state back to how it was before, leaving any
- // unallocated registers available for other smaller types.
- for (int I = 0; I < 8; I++)
- if (!RegsAllocated[I])
- State.DeallocateReg(ZRegList[I]);
-
- // All pending members have now been allocated
- PendingMembers.clear();
- return true;
- }
-
+ if (LocVT.isScalableVector()) {
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
+
+ // We are about to reinvoke the CCAssignFn auto-generated handler. If we
+ // don't unset these flags we will get stuck in an infinite loop forever
+ // invoking the custom handler.
+ ArgFlags.setInConsecutiveRegs(false);
+ ArgFlags.setInConsecutiveRegsLast(false);
+
+ // The calling convention for passing SVE tuples states that in the event
+ // we cannot allocate enough registers for the tuple we should still leave
+ // any remaining registers unallocated. However, when we call the
+ // CCAssignFn again we want it to behave as if all remaining registers are
+ // allocated. This will force the code to pass the tuple indirectly in
+ // accordance with the PCS.
+ bool RegsAllocated[8];
+ for (int I = 0; I < 8; I++) {
+ RegsAllocated[I] = State.isAllocated(ZRegList[I]);
+ State.AllocateReg(ZRegList[I]);
+ }
+
+ auto &It = PendingMembers[0];
+ CCAssignFn *AssignFn =
+ TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
+ if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
+ ArgFlags, State))
+ llvm_unreachable("Call operand has unhandled type");
+
+ // Return the flags to how they were before.
+ ArgFlags.setInConsecutiveRegs(true);
+ ArgFlags.setInConsecutiveRegsLast(true);
+
+ // Return the register state back to how it was before, leaving any
+ // unallocated registers available for other smaller types.
+ for (int I = 0; I < 8; I++)
+ if (!RegsAllocated[I])
+ State.DeallocateReg(ZRegList[I]);
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+ }
+
unsigned Size = LocVT.getSizeInBits() / 8;
const Align StackAlign =
State.getMachineFunction().getDataLayout().getStackAlignment();
@@ -191,11 +191,11 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
- if (!LocVT.isScalableVector()) {
- // Mark all regs in the class as unavailable
- for (auto Reg : RegList)
- State.AllocateReg(Reg);
- }
+ if (!LocVT.isScalableVector()) {
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+ }
const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
index b1e714653f..03d92b8d50 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Combine.td
@@ -75,68 +75,68 @@ def ext: GICombineRule <
// instruction.
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
-def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
-def vashr_vlshr_imm : GICombineRule<
- (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
- (match (wip_match_opcode G_ASHR, G_LSHR):$root,
- [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
->;
-
-def form_duplane_matchdata :
- GIDefMatchData<"std::pair<unsigned, int>">;
-def form_duplane : GICombineRule <
- (defs root:$root, form_duplane_matchdata:$matchinfo),
- (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
- [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-def adjust_icmp_imm_matchdata :
- GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
-def adjust_icmp_imm : GICombineRule <
- (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
- (match (wip_match_opcode G_ICMP):$root,
- [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
->;
-
-def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
-
-def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
-def extractvecelt_pairwise_add : GICombineRule<
- (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
- (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
- [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
-def mul_const : GICombineRule<
- (defs root:$root, mul_const_matchdata:$matchinfo),
- (match (wip_match_opcode G_MUL):$root,
- [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
->;
-
-// Post-legalization combines which should happen at all optimization levels.
-// (E.g. ones that facilitate matching for the selector) For example, matching
-// pseudos.
-def AArch64PostLegalizerLoweringHelper
- : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
- [shuffle_vector_pseudos, vashr_vlshr_imm,
- icmp_lowering, form_duplane]> {
- let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
-}
-
-// Post-legalization combines which are primarily optimizations.
+def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
+def vashr_vlshr_imm : GICombineRule<
+ (defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
+>;
+
+def form_duplane_matchdata :
+ GIDefMatchData<"std::pair<unsigned, int>">;
+def form_duplane : GICombineRule <
+ (defs root:$root, form_duplane_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+ [{ return matchDupLane(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def adjust_icmp_imm_matchdata :
+ GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
+def adjust_icmp_imm : GICombineRule <
+ (defs root:$root, adjust_icmp_imm_matchdata:$matchinfo),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return matchAdjustICmpImmAndPred(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
+
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
+def extractvecelt_pairwise_add : GICombineRule<
+ (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
+ (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+ [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+ (defs root:$root, mul_const_matchdata:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+// Post-legalization combines which should happen at all optimization levels.
+// (E.g. ones that facilitate matching for the selector) For example, matching
+// pseudos.
+def AArch64PostLegalizerLoweringHelper
+ : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
+ [shuffle_vector_pseudos, vashr_vlshr_imm,
+ icmp_lowering, form_duplane]> {
+ let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
+}
+
+// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
- [copy_prop, erase_undef_store, combines_for_extload,
- sext_trunc_sextload,
- hoist_logic_op_with_same_opcode_hands,
- redundant_and, xor_of_and_with_same_reg,
- extractvecelt_pairwise_add, redundant_or,
- mul_const]> {
+ [copy_prop, erase_undef_store, combines_for_extload,
+ sext_trunc_sextload,
+ hoist_logic_op_with_same_opcode_hands,
+ redundant_and, xor_of_and_with_same_reg,
+ extractvecelt_pairwise_add, redundant_or,
+ mul_const]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 2328a8b4de..d419598aaa 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -37,14 +37,14 @@ class AArch64CompressJumpTables : public MachineFunctionPass {
MachineFunction *MF;
SmallVector<int, 8> BlockInfo;
- /// Returns the size in instructions of the block \p MBB, or None if we
- /// couldn't get a safe upper bound.
- Optional<int> computeBlockSize(MachineBasicBlock &MBB);
-
- /// Gather information about the function, returns false if we can't perform
- /// this optimization for some reason.
- bool scanFunction();
-
+ /// Returns the size in instructions of the block \p MBB, or None if we
+ /// couldn't get a safe upper bound.
+ Optional<int> computeBlockSize(MachineBasicBlock &MBB);
+
+ /// Gather information about the function, returns false if we can't perform
+ /// this optimization for some reason.
+ bool scanFunction();
+
bool compressJumpTable(MachineInstr &MI, int Offset);
public:
@@ -64,27 +64,27 @@ public:
}
};
char AArch64CompressJumpTables::ID = 0;
-} // namespace
+} // namespace
INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
"AArch64 compress jump tables pass", false, false)
-Optional<int>
-AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+Optional<int>
+AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
int Size = 0;
- for (const MachineInstr &MI : MBB) {
- // Inline asm may contain some directives like .bytes which we don't
- // currently have the ability to parse accurately. To be safe, just avoid
- // computing a size and bail out.
- if (MI.getOpcode() == AArch64::INLINEASM ||
- MI.getOpcode() == AArch64::INLINEASM_BR)
- return None;
+ for (const MachineInstr &MI : MBB) {
+ // Inline asm may contain some directives like .bytes which we don't
+ // currently have the ability to parse accurately. To be safe, just avoid
+ // computing a size and bail out.
+ if (MI.getOpcode() == AArch64::INLINEASM ||
+ MI.getOpcode() == AArch64::INLINEASM_BR)
+ return None;
Size += TII->getInstSizeInBytes(MI);
- }
+ }
return Size;
}
-bool AArch64CompressJumpTables::scanFunction() {
+bool AArch64CompressJumpTables::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
@@ -97,12 +97,12 @@ bool AArch64CompressJumpTables::scanFunction() {
else
AlignedOffset = alignTo(Offset, Alignment);
BlockInfo[MBB.getNumber()] = AlignedOffset;
- auto BlockSize = computeBlockSize(MBB);
- if (!BlockSize)
- return false;
- Offset = AlignedOffset + *BlockSize;
+ auto BlockSize = computeBlockSize(MBB);
+ if (!BlockSize)
+ return false;
+ Offset = AlignedOffset + *BlockSize;
}
- return true;
+ return true;
}
bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
@@ -121,7 +121,7 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
int MaxOffset = std::numeric_limits<int>::min(),
MinOffset = std::numeric_limits<int>::max();
MachineBasicBlock *MinBlock = nullptr;
- for (auto *Block : JT.MBBs) {
+ for (auto *Block : JT.MBBs) {
int BlockOffset = BlockInfo[Block->getNumber()];
assert(BlockOffset % 4 == 0 && "misaligned basic block");
@@ -141,14 +141,14 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
}
int Span = MaxOffset - MinOffset;
- auto *AFI = MF->getInfo<AArch64FunctionInfo>();
+ auto *AFI = MF->getInfo<AArch64FunctionInfo>();
if (isUInt<8>(Span / 4)) {
AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
MI.setDesc(TII->get(AArch64::JumpTableDest8));
++NumJT8;
return true;
- }
- if (isUInt<16>(Span / 4)) {
+ }
+ if (isUInt<16>(Span / 4)) {
AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
MI.setDesc(TII->get(AArch64::JumpTableDest16));
++NumJT16;
@@ -169,8 +169,8 @@ bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
if (ST.force32BitJumpTables() && !MF->getFunction().hasMinSize())
return false;
- if (!scanFunction())
- return false;
+ if (!scanFunction())
+ return false;
for (MachineBasicBlock &MBB : *MF) {
int Offset = BlockInfo[MBB.getNumber()];
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index e57650ae60..1a8731883f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -83,8 +83,8 @@ private:
bool expandSVESpillFill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opc,
unsigned N);
- bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI);
+ bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
};
} // end anonymous namespace
@@ -629,46 +629,46 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
return true;
}
-bool AArch64ExpandPseudo::expandCALL_RVMARKER(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
- // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
- // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
- // in between.
- MachineInstr &MI = *MBBI;
-
- MachineInstr *OriginalCall;
- MachineOperand &CallTarget = MI.getOperand(0);
- assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
- "invalid operand for regular call");
- unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
- OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
- OriginalCall->addOperand(CallTarget);
-
- unsigned RegMaskStartIdx = 1;
- // Skip register arguments. Those are added during ISel, but are not
- // needed for the concrete branch.
- while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
- assert(MI.getOperand(RegMaskStartIdx).isReg() &&
- "should only skip register operands");
- RegMaskStartIdx++;
- }
- for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
- OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
-
- auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
- .addReg(AArch64::FP, RegState::Define)
- .addReg(AArch64::XZR)
- .addReg(AArch64::FP)
- .addImm(0)
- .getInstr();
- if (MI.shouldUpdateCallSiteInfo())
- MBB.getParent()->moveCallSiteInfo(&MI, Marker);
- MI.eraseFromParent();
- finalizeBundle(MBB, OriginalCall->getIterator(),
- std::next(Marker->getIterator()));
- return true;
-}
-
+bool AArch64ExpandPseudo::expandCALL_RVMARKER(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
+ // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
+ // in between.
+ MachineInstr &MI = *MBBI;
+
+ MachineInstr *OriginalCall;
+ MachineOperand &CallTarget = MI.getOperand(0);
+ assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+ "invalid operand for regular call");
+ unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+ OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
+ OriginalCall->addOperand(CallTarget);
+
+ unsigned RegMaskStartIdx = 1;
+ // Skip register arguments. Those are added during ISel, but are not
+ // needed for the concrete branch.
+ while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
+ assert(MI.getOperand(RegMaskStartIdx).isReg() &&
+ "should only skip register operands");
+ RegMaskStartIdx++;
+ }
+ for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
+ OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
+
+ auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
+ .addReg(AArch64::FP, RegState::Define)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::FP)
+ .addImm(0)
+ .getInstr();
+ if (MI.shouldUpdateCallSiteInfo())
+ MBB.getParent()->moveCallSiteInfo(&MI, Marker);
+ MI.eraseFromParent();
+ finalizeBundle(MBB, OriginalCall->getIterator(),
+ std::next(Marker->getIterator()));
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1056,8 +1056,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
- case AArch64::BLR_RVMARKER:
- return expandCALL_RVMARKER(MBB, MBBI);
+ case AArch64::BLR_RVMARKER:
+ return expandCALL_RVMARKER(MBB, MBBI);
}
return false;
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index 209f9f7255..afd8765f45 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -54,7 +54,7 @@
using namespace llvm;
-#define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
+#define DEBUG_TYPE "aarch64-falkor-hwpf-fix"
STATISTIC(NumStridedLoadsMarked, "Number of strided loads marked");
STATISTIC(NumCollisionsAvoided,
@@ -146,7 +146,7 @@ bool FalkorMarkStridedAccesses::run() {
bool FalkorMarkStridedAccesses::runOnLoop(Loop &L) {
// Only mark strided loads in the inner-most loop
- if (!L.isInnermost())
+ if (!L.isInnermost())
return false;
bool MadeChange = false;
@@ -224,10 +224,10 @@ struct LoadInfo {
char FalkorHWPFFix::ID = 0;
-INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
+INITIALIZE_PASS_BEGIN(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
"Falkor HW Prefetch Fix Late Phase", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
+INITIALIZE_PASS_END(FalkorHWPFFix, "aarch64-falkor-hwpf-fix-late",
"Falkor HW Prefetch Fix Late Phase", false, false)
static unsigned makeTag(unsigned Dest, unsigned Base, unsigned Offset) {
@@ -830,7 +830,7 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) {
for (MachineLoop *I : LI)
for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L)
// Only process inner-loops
- if (L->isInnermost())
+ if (L->isInnermost())
runOnLoop(**L, Fn);
return Modified;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
index 9801036653..b4e4233448 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3409,7 +3409,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
@@ -3651,10 +3651,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
- case Intrinsic::debugtrap:
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
- .addImm(0xF000);
- return true;
+ case Intrinsic::debugtrap:
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
+ .addImm(0xF000);
+ return true;
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -3696,7 +3696,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const Value *LHS = II->getArgOperand(0);
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
std::swap(LHS, RHS);
// Simplify multiplies.
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
index 65ee501604..9aa8f7a804 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -175,10 +175,10 @@ static cl::opt<bool> StackTaggingMergeSetTag(
cl::desc("merge settag instruction in function epilog"), cl::init(true),
cl::Hidden);
-static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
- cl::desc("sort stack allocations"),
- cl::init(true), cl::Hidden);
-
+static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
+ cl::desc("sort stack allocations"),
+ cl::init(true), cl::Hidden);
+
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
/// Returns the argument pop size.
@@ -249,7 +249,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
TargetStackID::Value
AArch64FrameLowering::getStackIDForScalableVectors() const {
- return TargetStackID::ScalableVector;
+ return TargetStackID::ScalableVector;
}
/// Returns the size of the fixed object area (allocated next to sp on entry)
@@ -273,7 +273,7 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
/// Returns the size of the entire SVE stackframe (calleesaves + spills).
static StackOffset getSVEStackSize(const MachineFunction &MF) {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
+ return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
@@ -365,15 +365,15 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(Amount), TII);
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(Amount), TII);
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
// stack, we want to add it back if we have a reserved call frame.
assert(CalleePopAmount < 0xffffff && "call frame too large");
emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
+ StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
}
return MBB.erase(I);
}
@@ -413,8 +413,8 @@ static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
int64_t NumBytes, NumVGScaledBytes;
- AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
- NumVGScaledBytes);
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(OffsetFromSP, NumBytes,
+ NumVGScaledBytes);
std::string CommentBuffer = "sp";
llvm::raw_string_ostream Comment(CommentBuffer);
@@ -441,8 +441,8 @@ MCCFIInstruction AArch64FrameLowering::createCfaOffset(
const TargetRegisterInfo &TRI, unsigned Reg,
const StackOffset &OffsetFromDefCFA) const {
int64_t NumBytes, NumVGScaledBytes;
- AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
- OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
+ AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
@@ -496,14 +496,14 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
continue;
StackOffset Offset;
- if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
+ if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
- StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+ Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
} else {
- Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea());
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
+ getOffsetOfLocalArea());
}
unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
@@ -584,12 +584,12 @@ static bool windowsRequiresStackProbe(MachineFunction &MF,
!F.hasFnAttribute("no-stack-arg-probe");
}
-static bool needsWinCFI(const MachineFunction &MF) {
- const Function &F = MF.getFunction();
- return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
- F.needsUnwindTableEntry();
-}
-
+static bool needsWinCFI(const MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ F.needsUnwindTableEntry();
+}
+
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, uint64_t StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -600,18 +600,18 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
if (AFI->getLocalStackSize() == 0)
return false;
- // For WinCFI, if optimizing for size, prefer to not combine the stack bump
- // (to force a stp with predecrement) to match the packed unwind format,
- // provided that there actually are any callee saved registers to merge the
- // decrement with.
- // This is potentially marginally slower, but allows using the packed
- // unwind format for functions that both have a local area and callee saved
- // registers. Using the packed unwind format notably reduces the size of
- // the unwind info.
- if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
- MF.getFunction().hasOptSize())
- return false;
-
+ // For WinCFI, if optimizing for size, prefer to not combine the stack bump
+ // (to force a stp with predecrement) to match the packed unwind format,
+ // provided that there actually are any callee saved registers to merge the
+ // decrement with.
+ // This is potentially marginally slower, but allows using the packed
+ // unwind format for functions that both have a local area and callee saved
+ // registers. Using the packed unwind format notably reduces the size of
+ // the unwind info.
+ if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
+ MF.getFunction().hasOptSize())
+ return false;
+
// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
@@ -1051,16 +1051,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
- const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
- if (MFnI.shouldSignReturnAddress()) {
- if (MFnI.shouldSignWithBKey()) {
+ const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
+ if (MFnI.shouldSignReturnAddress()) {
+ if (MFnI.shouldSignWithBKey()) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
.setMIFlag(MachineInstr::FrameSetup);
- } else {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
- .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
+ .setMIFlag(MachineInstr::FrameSetup);
}
unsigned CFIIndex =
@@ -1075,13 +1075,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
- // Set tagged base pointer to the requested stack slot.
+ // Set tagged base pointer to the requested stack slot.
// Ideally it should match SP value after prologue.
- Optional<int> TBPI = AFI->getTaggedBasePointerIndex();
- if (TBPI)
- AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
- else
- AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+ Optional<int> TBPI = AFI->getTaggedBasePointerIndex();
+ if (TBPI)
+ AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+ else
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
const StackOffset &SVEStackSize = getSVEStackSize(MF);
@@ -1108,8 +1108,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (!NeedsWinCFI && needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -1142,8 +1142,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
@@ -1167,7 +1167,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// For funclets the FP belongs to the containing function.
if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
- int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -1177,8 +1177,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
- StackOffset::getFixed(FPOffset), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1288,7 +1288,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
+ AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
AllocateAfter = SVEStackSize - AllocateBefore;
}
@@ -1320,8 +1320,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
@@ -1458,15 +1458,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset w28, -32
if (HasFP) {
- const int OffsetToFirstCalleeSaveFromFP =
- AFI->getCalleeSaveBaseToFrameRecordOffset() -
- AFI->getCalleeSavedStackSize();
- Register FramePtr = RegInfo->getFrameRegister(MF);
-
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = RegInfo->getFrameRegister(MF);
+
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+ MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1476,7 +1476,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
} else {
// Encode the stack size of the leaf function.
@@ -1496,8 +1496,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
static void InsertReturnAddressAuth(MachineFunction &MF,
MachineBasicBlock &MBB) {
- const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
- if (!MFI.shouldSignReturnAddress())
+ const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
+ if (!MFI.shouldSignReturnAddress())
return;
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -1511,16 +1511,16 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
// instructions, namely RETA{A,B}, that can be used instead.
- if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
+ if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
+ TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
} else {
BuildMI(
MBB, MBBI, DL,
- TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
+ TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
}
}
@@ -1545,7 +1545,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
bool IsFunclet = false;
- auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
+ auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
@@ -1645,13 +1645,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NeedsWinCFI, &HasWinCFI);
}
- if (MF.hasWinCFI()) {
- // If the prologue didn't contain any SEH opcodes and didn't set the
- // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
- // EpilogStart - to avoid generating CFI for functions that don't need it.
- // (And as we didn't generate any prologue at all, it would be asymmetrical
- // to the epilogue.) By the end of the function, we assert that
- // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
+ if (MF.hasWinCFI()) {
+ // If the prologue didn't contain any SEH opcodes and didn't set the
+ // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
+ // EpilogStart - to avoid generating CFI for functions that don't need it.
+ // (And as we didn't generate any prologue at all, it would be asymmetrical
+ // to the epilogue.) By the end of the function, we assert that
+ // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
HasWinCFI = true;
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1663,10 +1663,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
- TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
- &HasWinCFI);
- if (HasWinCFI)
+ StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+ if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1689,8 +1689,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(IsSVECalleeSave(RestoreBegin) &&
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
- StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
+ StackOffset CalleeSavedSizeAsOffset =
+ StackOffset::getScalable(CalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
@@ -1703,15 +1703,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be reloaded. The code below will deallocate the stack space
// space by moving FP -> SP.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
- StackOffset::getScalable(-CalleeSavedSize), TII,
+ StackOffset::getScalable(-CalleeSavedSize), TII,
MachineInstr::FrameDestroy);
} else {
if (AFI->getSVECalleeSavedStackSize()) {
// Deallocate the non-SVE locals first before we can deallocate (and
// restore callee saves) from the SVE area.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII,
- MachineInstr::FrameDestroy);
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy);
NumBytes = 0;
}
@@ -1744,10 +1744,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(StackRestoreBytes), TII,
+ StackOffset::getFixed(StackRestoreBytes), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
if (Done) {
- if (HasWinCFI) {
+ if (HasWinCFI) {
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1763,14 +1763,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
- emitFrameOffset(
- MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
- TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ emitFrameOffset(
+ MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
+ StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI);
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI);
// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
@@ -1791,10 +1791,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
+ StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
}
- if (HasWinCFI)
+ if (HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
}
@@ -1803,51 +1803,51 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
/// SP-relative and simple call frames aren't used.
-StackOffset
-AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset
+AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
return resolveFrameIndexReference(
- MF, FI, FrameReg,
- /*PreferFP=*/
- MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
- /*ForSimm=*/false);
+ MF, FI, FrameReg,
+ /*PreferFP=*/
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
+ /*ForSimm=*/false);
}
-StackOffset
-AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
- int FI) const {
- return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
+StackOffset
+AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const {
+ return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
}
-static StackOffset getFPOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+static StackOffset getFPOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
- int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
- int64_t FPAdjust =
- CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
- return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
+ int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
+ int64_t FPAdjust =
+ CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
+ return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
}
-static StackOffset getStackOffset(const MachineFunction &MF,
- int64_t ObjectOffset) {
+static StackOffset getStackOffset(const MachineFunction &MF,
+ int64_t ObjectOffset) {
const auto &MFI = MF.getFrameInfo();
- return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
+ return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
}
- // TODO: This function currently does not work for scalable vectors.
+ // TODO: This function currently does not work for scalable vectors.
int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
int FI) const {
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
- ? getFPOffset(MF, ObjectOffset).getFixed()
- : getStackOffset(MF, ObjectOffset).getFixed();
+ ? getFPOffset(MF, ObjectOffset).getFixed()
+ : getStackOffset(MF, ObjectOffset).getFixed();
}
StackOffset AArch64FrameLowering::resolveFrameIndexReference(
@@ -1856,7 +1856,7 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
+ bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
@@ -1870,8 +1870,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
- int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
+ int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
+ int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
@@ -1946,16 +1946,16 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"non-argument/CSR objects cannot be accessed through the frame pointer");
if (isSVE) {
- StackOffset FPOffset =
- StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
- StackOffset SPOffset =
- SVEStackSize +
- StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
- ObjectOffset);
+ StackOffset FPOffset =
+ StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ StackOffset SPOffset =
+ SVEStackSize +
+ StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
+ ObjectOffset);
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) &&
- (SPOffset.getFixed() ||
- FPOffset.getScalable() < SPOffset.getScalable() ||
+ (SPOffset.getFixed() ||
+ FPOffset.getScalable() < SPOffset.getScalable() ||
RegInfo->needsStackRealignment(MF))) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -1974,7 +1974,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset::getFixed(FPOffset) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + ScalableOffset;
}
// Use the base pointer if we have one.
@@ -1991,7 +1991,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset::getFixed(Offset) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -2013,12 +2013,12 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
}
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool NeedsWinCFI, bool IsFirst) {
+ bool NeedsWinCFI, bool IsFirst) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
// opcodes for saves/restores of non-consectuve register pairs.
- // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
- // save_lrpair.
+ // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
+ // save_lrpair.
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
if (Reg2 == AArch64::FP)
@@ -2027,14 +2027,14 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
return false;
if (Reg2 == Reg1 + 1)
return false;
- // If pairing a GPR with LR, the pair can be described by the save_lrpair
- // opcode. If this is the first register pair, it would end up with a
- // predecrement, but there's no save_lrpair_x opcode, so we can only do this
- // if LR is paired with something else than the first register.
- // The save_lrpair opcode requires the first register to be an odd one.
- if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
- (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
- return false;
+ // If pairing a GPR with LR, the pair can be described by the save_lrpair
+ // opcode. If this is the first register pair, it would end up with a
+ // predecrement, but there's no save_lrpair_x opcode, so we can only do this
+ // if LR is paired with something else than the first register.
+ // The save_lrpair opcode requires the first register to be an odd one.
+ if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
+ (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
+ return false;
return true;
}
@@ -2043,10 +2043,10 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
/// LR and FP need to be allocated together when the frame needs to save
/// the frame-record. This means any other register pairing with LR is invalid.
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
- bool UsesWinAAPCS, bool NeedsWinCFI,
- bool NeedsFrameRecord, bool IsFirst) {
+ bool UsesWinAAPCS, bool NeedsWinCFI,
+ bool NeedsFrameRecord, bool IsFirst) {
if (UsesWinAAPCS)
- return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
+ return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@@ -2110,22 +2110,22 @@ static void computeCalleeSaveRegisterPairs(
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
- int StackFillDir = -1;
- int RegInc = 1;
- unsigned FirstReg = 0;
- if (NeedsWinCFI) {
- // For WinCFI, fill the stack from the bottom up.
- ByteOffset = 0;
- StackFillDir = 1;
- // As the CSI array is reversed to match PrologEpilogInserter, iterate
- // backwards, to pair up registers starting from lower numbered registers.
- RegInc = -1;
- FirstReg = Count - 1;
- }
+ int StackFillDir = -1;
+ int RegInc = 1;
+ unsigned FirstReg = 0;
+ if (NeedsWinCFI) {
+ // For WinCFI, fill the stack from the bottom up.
+ ByteOffset = 0;
+ StackFillDir = 1;
+ // As the CSI array is reversed to match PrologEpilogInserter, iterate
+ // backwards, to pair up registers starting from lower numbered registers.
+ RegInc = -1;
+ FirstReg = Count - 1;
+ }
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
- // When iterating backwards, the loop condition relies on unsigned wraparound.
- for (unsigned i = FirstReg; i < Count; i += RegInc) {
+ // When iterating backwards, the loop condition relies on unsigned wraparound.
+ for (unsigned i = FirstReg; i < Count; i += RegInc) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
@@ -2143,20 +2143,20 @@ static void computeCalleeSaveRegisterPairs(
llvm_unreachable("Unsupported register class.");
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count) {
- unsigned NextReg = CSI[i + RegInc].getReg();
- bool IsFirst = i == FirstReg;
+ if (unsigned(i + RegInc) < Count) {
+ unsigned NextReg = CSI[i + RegInc].getReg();
+ bool IsFirst = i == FirstReg;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
- NeedsWinCFI, NeedsFrameRecord, IsFirst))
+ !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
+ NeedsWinCFI, NeedsFrameRecord, IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
- IsFirst))
+ !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
+ IsFirst))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
@@ -2185,7 +2185,7 @@ static void computeCalleeSaveRegisterPairs(
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
assert((!RPI.isPaired() ||
- (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
+ (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
"Out of order callee saved regs!");
assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
@@ -2207,73 +2207,73 @@ static void computeCalleeSaveRegisterPairs(
"Callee-save registers not saved as adjacent register pair!");
RPI.FrameIdx = CSI[i].getFrameIdx();
- if (NeedsWinCFI &&
- RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
- RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
+ if (NeedsWinCFI &&
+ RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
+ RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
int Scale = RPI.getScale();
-
- int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
- assert(OffsetPre % Scale == 0);
-
+
+ int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
+ assert(OffsetPre % Scale == 0);
+
if (RPI.isScalable())
- ScalableByteOffset += StackFillDir * Scale;
+ ScalableByteOffset += StackFillDir * Scale;
else
- ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
+ ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
assert(!(RPI.isScalable() && RPI.isPaired()) &&
"Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
- if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
+ if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
!RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
!RPI.isPaired()) {
- ByteOffset += 8 * StackFillDir;
+ ByteOffset += 8 * StackFillDir;
assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
- // A stack frame with a gap looks like this, bottom up:
- // d9, d8. x21, gap, x20, x19.
- // Set extra alignment on the x21 object (the only unpaired register)
- // to create the gap above it.
+ // A stack frame with a gap looks like this, bottom up:
+ // d9, d8. x21, gap, x20, x19.
+ // Set extra alignment on the x21 object (the only unpaired register)
+ // to create the gap above it.
MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
}
- int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
- assert(OffsetPost % Scale == 0);
- // If filling top down (default), we want the offset after incrementing it.
- // If fillibg bootom up (WinCFI) we need the original offset.
- int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
+ int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
+ assert(OffsetPost % Scale == 0);
+ // If filling top down (default), we want the offset after incrementing it.
+ // If fillibg bootom up (WinCFI) we need the original offset.
+ int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
RPI.Offset = Offset / Scale;
assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
- // Save the offset to frame record so that the FP register can point to the
- // innermost frame record (spilled FP and LR registers).
- if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
- RPI.Reg2 == AArch64::FP) ||
- (IsWindows && RPI.Reg1 == AArch64::FP &&
- RPI.Reg2 == AArch64::LR)))
- AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
-
+ // Save the offset to frame record so that the FP register can point to the
+ // innermost frame record (spilled FP and LR registers).
+ if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
+ RPI.Reg2 == AArch64::FP) ||
+ (IsWindows && RPI.Reg1 == AArch64::FP &&
+ RPI.Reg2 == AArch64::LR)))
+ AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
+
RegPairs.push_back(RPI);
if (RPI.isPaired())
- i += RegInc;
- }
- if (NeedsWinCFI) {
- // If we need an alignment gap in the stack, align the topmost stack
- // object. A stack frame with a gap looks like this, bottom up:
- // x19, d8. d9, gap.
- // Set extra alignment on the topmost stack object (the first element in
- // CSI, which goes top down), to create the gap above it.
- if (AFI->hasCalleeSaveStackFreeSpace())
- MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
- // We iterated bottom up over the registers; flip RegPairs back to top
- // down order.
- std::reverse(RegPairs.begin(), RegPairs.end());
- }
+ i += RegInc;
+ }
+ if (NeedsWinCFI) {
+ // If we need an alignment gap in the stack, align the topmost stack
+ // object. A stack frame with a gap looks like this, bottom up:
+ // x19, d8. d9, gap.
+ // Set extra alignment on the topmost stack object (the first element in
+ // CSI, which goes top down), to create the gap above it.
+ if (AFI->hasCalleeSaveStackFreeSpace())
+ MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
+ // We iterated bottom up over the registers; flip RegPairs back to top
+ // down order.
+ std::reverse(RegPairs.begin(), RegPairs.end());
+ }
}
bool AArch64FrameLowering::spillCalleeSavedRegisters(
@@ -2412,7 +2412,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
- MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector);
+ MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector);
}
return true;
@@ -2704,21 +2704,21 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
-bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
- MachineFunction &MF, const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const {
- bool NeedsWinCFI = needsWinCFI(MF);
- // To match the canonical windows frame layout, reverse the list of
- // callee saved registers to get them laid out by PrologEpilogInserter
- // in the right order. (PrologEpilogInserter allocates stack objects top
- // down. Windows canonical prologs store higher numbered registers at
- // the top, thus have the CSI array start from the highest registers.)
- if (NeedsWinCFI)
- std::reverse(CSI.begin(), CSI.end());
- // Let the generic code do the rest of the setup.
- return false;
-}
-
+bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ bool NeedsWinCFI = needsWinCFI(MF);
+ // To match the canonical windows frame layout, reverse the list of
+ // callee saved registers to get them laid out by PrologEpilogInserter
+ // in the right order. (PrologEpilogInserter allocates stack objects top
+ // down. Windows canonical prologs store higher numbered registers at
+ // the top, thus have the CSI array start from the highest registers.)
+ if (NeedsWinCFI)
+ std::reverse(CSI.begin(), CSI.end());
+ // Let the generic code do the rest of the setup.
+ return false;
+}
+
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2761,7 +2761,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
+ assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
@@ -2791,7 +2791,7 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
SmallVector<int, 8> ObjectsToAllocate;
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
unsigned StackID = MFI.getStackID(I);
- if (StackID != TargetStackID::ScalableVector)
+ if (StackID != TargetStackID::ScalableVector)
continue;
if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
continue;
@@ -2945,12 +2945,12 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
const int64_t kMaxOffset = 255 * 16;
Register BaseReg = FrameReg;
- int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
+ int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
if (BaseRegOffsetBytes < kMinOffset ||
BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
- StackOffset::getFixed(BaseRegOffsetBytes), TII);
+ StackOffset::getFixed(BaseRegOffsetBytes), TII);
BaseReg = ScratchReg;
BaseRegOffsetBytes = 0;
}
@@ -3007,7 +3007,7 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
LoopI->setFlags(FrameRegUpdateFlags);
int64_t ExtraBaseRegUpdate =
- FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
+ FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
if (LoopSize < Size) {
assert(FrameRegUpdate);
assert(Size - LoopSize == 16);
@@ -3111,7 +3111,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
// realistically happens in function epilogue. Also, STGloop is expanded
// before that pass.
if (InsertI != MBB->end() &&
- canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
+ canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
&TotalOffset)) {
UpdateInstr = &*InsertI++;
LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
@@ -3274,7 +3274,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
/// before the update. This is easily retrieved as it is exactly the offset
/// that is set in processFunctionBeforeFrameFinalized.
-StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
+StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
const MachineFunction &MF, int FI, Register &FrameReg,
bool IgnoreSPUpdates) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3282,7 +3282,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
<< MFI.getObjectOffset(FI) << "\n");
FrameReg = AArch64::SP;
- return StackOffset::getFixed(MFI.getObjectOffset(FI));
+ return StackOffset::getFixed(MFI.getObjectOffset(FI));
}
return getFrameIndexReference(MF, FI, FrameReg);
@@ -3306,162 +3306,162 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
getStackAlign());
}
-
-namespace {
-struct FrameObject {
- bool IsValid = false;
- // Index of the object in MFI.
- int ObjectIndex = 0;
- // Group ID this object belongs to.
- int GroupIndex = -1;
- // This object should be placed first (closest to SP).
- bool ObjectFirst = false;
- // This object's group (which always contains the object with
- // ObjectFirst==true) should be placed first.
- bool GroupFirst = false;
-};
-
-class GroupBuilder {
- SmallVector<int, 8> CurrentMembers;
- int NextGroupIndex = 0;
- std::vector<FrameObject> &Objects;
-
-public:
- GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
- void AddMember(int Index) { CurrentMembers.push_back(Index); }
- void EndCurrentGroup() {
- if (CurrentMembers.size() > 1) {
- // Create a new group with the current member list. This might remove them
- // from their pre-existing groups. That's OK, dealing with overlapping
- // groups is too hard and unlikely to make a difference.
- LLVM_DEBUG(dbgs() << "group:");
- for (int Index : CurrentMembers) {
- Objects[Index].GroupIndex = NextGroupIndex;
- LLVM_DEBUG(dbgs() << " " << Index);
- }
- LLVM_DEBUG(dbgs() << "\n");
- NextGroupIndex++;
- }
- CurrentMembers.clear();
- }
-};
-
-bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
- // Objects at a lower index are closer to FP; objects at a higher index are
- // closer to SP.
- //
- // For consistency in our comparison, all invalid objects are placed
- // at the end. This also allows us to stop walking when we hit the
- // first invalid item after it's all sorted.
- //
- // The "first" object goes first (closest to SP), followed by the members of
- // the "first" group.
- //
- // The rest are sorted by the group index to keep the groups together.
- // Higher numbered groups are more likely to be around longer (i.e. untagged
- // in the function epilogue and not at some earlier point). Place them closer
- // to SP.
- //
- // If all else equal, sort by the object index to keep the objects in the
- // original order.
- return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
- A.ObjectIndex) <
- std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
- B.ObjectIndex);
-}
-} // namespace
-
-void AArch64FrameLowering::orderFrameObjects(
- const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
- if (!OrderFrameObjects || ObjectsToAllocate.empty())
- return;
-
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
- for (auto &Obj : ObjectsToAllocate) {
- FrameObjects[Obj].IsValid = true;
- FrameObjects[Obj].ObjectIndex = Obj;
- }
-
- // Identify stack slots that are tagged at the same time.
- GroupBuilder GB(FrameObjects);
- for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- if (MI.isDebugInstr())
- continue;
- int OpIndex;
- switch (MI.getOpcode()) {
- case AArch64::STGloop:
- case AArch64::STZGloop:
- OpIndex = 3;
- break;
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
- OpIndex = 1;
- break;
- default:
- OpIndex = -1;
- }
-
- int TaggedFI = -1;
- if (OpIndex >= 0) {
- const MachineOperand &MO = MI.getOperand(OpIndex);
- if (MO.isFI()) {
- int FI = MO.getIndex();
- if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
- FrameObjects[FI].IsValid)
- TaggedFI = FI;
- }
- }
-
- // If this is a stack tagging instruction for a slot that is not part of a
- // group yet, either start a new group or add it to the current one.
- if (TaggedFI >= 0)
- GB.AddMember(TaggedFI);
- else
- GB.EndCurrentGroup();
- }
- // Groups should never span multiple basic blocks.
- GB.EndCurrentGroup();
- }
-
- // If the function's tagged base pointer is pinned to a stack slot, we want to
- // put that slot first when possible. This will likely place it at SP + 0,
- // and save one instruction when generating the base pointer because IRG does
- // not allow an immediate offset.
- const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
- Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
- if (TBPI) {
- FrameObjects[*TBPI].ObjectFirst = true;
- FrameObjects[*TBPI].GroupFirst = true;
- int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
- if (FirstGroupIndex >= 0)
- for (FrameObject &Object : FrameObjects)
- if (Object.GroupIndex == FirstGroupIndex)
- Object.GroupFirst = true;
- }
-
- llvm::stable_sort(FrameObjects, FrameObjectCompare);
-
- int i = 0;
- for (auto &Obj : FrameObjects) {
- // All invalid items are sorted at the end, so it's safe to stop.
- if (!Obj.IsValid)
- break;
- ObjectsToAllocate[i++] = Obj.ObjectIndex;
- }
-
- LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
- : FrameObjects) {
- if (!Obj.IsValid)
- break;
- dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
- if (Obj.ObjectFirst)
- dbgs() << ", first";
- if (Obj.GroupFirst)
- dbgs() << ", group-first";
- dbgs() << "\n";
- });
-}
+
+namespace {
+struct FrameObject {
+ bool IsValid = false;
+ // Index of the object in MFI.
+ int ObjectIndex = 0;
+ // Group ID this object belongs to.
+ int GroupIndex = -1;
+ // This object should be placed first (closest to SP).
+ bool ObjectFirst = false;
+ // This object's group (which always contains the object with
+ // ObjectFirst==true) should be placed first.
+ bool GroupFirst = false;
+};
+
+class GroupBuilder {
+ SmallVector<int, 8> CurrentMembers;
+ int NextGroupIndex = 0;
+ std::vector<FrameObject> &Objects;
+
+public:
+ GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
+ void AddMember(int Index) { CurrentMembers.push_back(Index); }
+ void EndCurrentGroup() {
+ if (CurrentMembers.size() > 1) {
+ // Create a new group with the current member list. This might remove them
+ // from their pre-existing groups. That's OK, dealing with overlapping
+ // groups is too hard and unlikely to make a difference.
+ LLVM_DEBUG(dbgs() << "group:");
+ for (int Index : CurrentMembers) {
+ Objects[Index].GroupIndex = NextGroupIndex;
+ LLVM_DEBUG(dbgs() << " " << Index);
+ }
+ LLVM_DEBUG(dbgs() << "\n");
+ NextGroupIndex++;
+ }
+ CurrentMembers.clear();
+ }
+};
+
+bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
+ // Objects at a lower index are closer to FP; objects at a higher index are
+ // closer to SP.
+ //
+ // For consistency in our comparison, all invalid objects are placed
+ // at the end. This also allows us to stop walking when we hit the
+ // first invalid item after it's all sorted.
+ //
+ // The "first" object goes first (closest to SP), followed by the members of
+ // the "first" group.
+ //
+ // The rest are sorted by the group index to keep the groups together.
+ // Higher numbered groups are more likely to be around longer (i.e. untagged
+ // in the function epilogue and not at some earlier point). Place them closer
+ // to SP.
+ //
+ // If all else equal, sort by the object index to keep the objects in the
+ // original order.
+ return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
+ A.ObjectIndex) <
+ std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
+ B.ObjectIndex);
+}
+} // namespace
+
+void AArch64FrameLowering::orderFrameObjects(
+ const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
+ if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ return;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
+ for (auto &Obj : ObjectsToAllocate) {
+ FrameObjects[Obj].IsValid = true;
+ FrameObjects[Obj].ObjectIndex = Obj;
+ }
+
+ // Identify stack slots that are tagged at the same time.
+ GroupBuilder GB(FrameObjects);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+ int OpIndex;
+ switch (MI.getOpcode()) {
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ OpIndex = 3;
+ break;
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ OpIndex = 1;
+ break;
+ default:
+ OpIndex = -1;
+ }
+
+ int TaggedFI = -1;
+ if (OpIndex >= 0) {
+ const MachineOperand &MO = MI.getOperand(OpIndex);
+ if (MO.isFI()) {
+ int FI = MO.getIndex();
+ if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
+ FrameObjects[FI].IsValid)
+ TaggedFI = FI;
+ }
+ }
+
+ // If this is a stack tagging instruction for a slot that is not part of a
+ // group yet, either start a new group or add it to the current one.
+ if (TaggedFI >= 0)
+ GB.AddMember(TaggedFI);
+ else
+ GB.EndCurrentGroup();
+ }
+ // Groups should never span multiple basic blocks.
+ GB.EndCurrentGroup();
+ }
+
+ // If the function's tagged base pointer is pinned to a stack slot, we want to
+ // put that slot first when possible. This will likely place it at SP + 0,
+ // and save one instruction when generating the base pointer because IRG does
+ // not allow an immediate offset.
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
+ if (TBPI) {
+ FrameObjects[*TBPI].ObjectFirst = true;
+ FrameObjects[*TBPI].GroupFirst = true;
+ int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
+ if (FirstGroupIndex >= 0)
+ for (FrameObject &Object : FrameObjects)
+ if (Object.GroupIndex == FirstGroupIndex)
+ Object.GroupFirst = true;
+ }
+
+ llvm::stable_sort(FrameObjects, FrameObjectCompare);
+
+ int i = 0;
+ for (auto &Obj : FrameObjects) {
+ // All invalid items are sorted at the end, so it's safe to stop.
+ if (!Obj.IsValid)
+ break;
+ ObjectsToAllocate[i++] = Obj.ObjectIndex;
+ }
+
+ LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
+ : FrameObjects) {
+ if (!Obj.IsValid)
+ break;
+ dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
+ if (Obj.ObjectFirst)
+ dbgs() << ", first";
+ if (Obj.GroupFirst)
+ dbgs() << ", group-first";
+ dbgs() << "\n";
+ });
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
index 80079a9d98..b3a402de03 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
@@ -41,8 +41,8 @@ public:
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
- StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg, bool PreferFP,
bool ForSimm) const;
@@ -67,11 +67,11 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- bool
- assignCalleeSavedSpillSlots(MachineFunction &MF,
- const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const override;
-
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -94,12 +94,12 @@ public:
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
- StackOffset
- getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
- Register &FrameReg,
- bool IgnoreSPUpdates) const override;
- StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
- int FI) const override;
+ StackOffset
+ getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+ Register &FrameReg,
+ bool IgnoreSPUpdates) const override;
+ StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const override;
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
bool isSupportedStackID(TargetStackID::Value ID) const override {
@@ -107,7 +107,7 @@ public:
default:
return false;
case TargetStackID::Default:
- case TargetStackID::ScalableVector:
+ case TargetStackID::ScalableVector:
case TargetStackID::NoAlloc:
return true;
}
@@ -116,13 +116,13 @@ public:
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
// We don't support putting SVE objects into the pre-allocated local
// frame block at the moment.
- return StackId != TargetStackID::ScalableVector;
+ return StackId != TargetStackID::ScalableVector;
}
- void
- orderFrameObjects(const MachineFunction &MF,
- SmallVectorImpl<int> &ObjectsToAllocate) const override;
-
+ void
+ orderFrameObjects(const MachineFunction &MF,
+ SmallVectorImpl<int> &ObjectsToAllocate) const override;
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
uint64_t StackBumpBytes) const;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 94b5d7718d..a570f2d3b0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/APSInt.h"
@@ -191,16 +191,16 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
- template <MVT::SimpleValueType VT>
- bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
- return SelectSVEArithImm(N, VT, Imm);
- }
-
- template <unsigned Low, unsigned High, bool AllowSaturation = false>
- bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
- return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
+ template <MVT::SimpleValueType VT>
+ bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
+ return SelectSVEArithImm(N, VT, Imm);
}
+ template <unsigned Low, unsigned High, bool AllowSaturation = false>
+ bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
+ return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
+ }
+
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
template<signed Min, signed Max, signed Scale, bool Shift>
bool SelectCntImm(SDValue N, SDValue &Imm) {
@@ -329,10 +329,10 @@ private:
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
- bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
- bool AllowSaturation, SDValue &Imm);
+ bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
+ bool AllowSaturation, SDValue &Imm);
- bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
+ bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);
};
@@ -1377,12 +1377,12 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
- // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
- // because it's too simple to have needed special treatment during lowering.
- if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
- MachineMemOperand *MemOp = MemIntr->getMemOperand();
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
- }
+ // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
+ // because it's too simple to have needed special treatment during lowering.
+ if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
+ MachineMemOperand *MemOp = MemIntr->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
+ }
CurDAG->RemoveDeadNode(N);
}
@@ -3136,28 +3136,28 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
return false;
}
-bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
+bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getZExtValue();
-
- switch (VT.SimpleTy) {
- case MVT::i8:
- ImmVal &= 0xFF;
- break;
- case MVT::i16:
- ImmVal &= 0xFFFF;
- break;
- case MVT::i32:
- ImmVal &= 0xFFFFFFFF;
- break;
- case MVT::i64:
- break;
- default:
- llvm_unreachable("Unexpected type");
- }
-
+ uint64_t ImmVal = CNode->getZExtValue();
+
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ImmVal &= 0xFF;
+ break;
+ case MVT::i16:
+ ImmVal &= 0xFFFF;
+ break;
+ case MVT::i32:
+ ImmVal &= 0xFFFFFFFF;
+ break;
+ case MVT::i64:
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
+ }
+
if (ImmVal < 256) {
- Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
return true;
}
}
@@ -3201,30 +3201,30 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
return false;
}
-// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
-// Rather than attempt to normalise everything we can sometimes saturate the
-// shift amount during selection. This function also allows for consistent
-// isel patterns by ensuring the resulting "Imm" node is of the i32 type
-// required by the instructions.
-bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
- uint64_t High, bool AllowSaturation,
- SDValue &Imm) {
+// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
+// Rather than attempt to normalise everything we can sometimes saturate the
+// shift amount during selection. This function also allows for consistent
+// isel patterns by ensuring the resulting "Imm" node is of the i32 type
+// required by the instructions.
+bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
+ uint64_t High, bool AllowSaturation,
+ SDValue &Imm) {
if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CN->getZExtValue();
- // Reject shift amounts that are too small.
- if (ImmVal < Low)
- return false;
-
- // Reject or saturate shift amounts that are too big.
- if (ImmVal > High) {
- if (!AllowSaturation)
- return false;
- ImmVal = High;
+ // Reject shift amounts that are too small.
+ if (ImmVal < Low)
+ return false;
+
+ // Reject or saturate shift amounts that are too big.
+ if (ImmVal > High) {
+ if (!AllowSaturation)
+ return false;
+ ImmVal = High;
}
-
- Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
- return true;
+
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
+ return true;
}
return false;
@@ -3833,9 +3833,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
}
break;
- case Intrinsic::aarch64_ld64b:
- SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
- return;
+ case Intrinsic::aarch64_ld64b:
+ SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
+ return;
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4854,8 +4854,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
return EVT();
ElementCount EC = PredVT.getVectorElementCount();
- EVT ScalarVT =
- EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
+ EVT ScalarVT =
+ EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
return MemVT;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
index c522ee7662..513c8932b3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -112,76 +112,76 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
"optimization"),
cl::init(true));
-// Temporary option added for the purpose of testing functionality added
-// to DAGCombiner.cpp in D92230. It is expected that this can be removed
-// in future when both implementations will be based off MGATHER rather
-// than the GLD1 nodes added for the SVE gather load intrinsics.
-static cl::opt<bool>
-EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
- cl::desc("Combine extends of AArch64 masked "
- "gather intrinsics"),
- cl::init(true));
-
+// Temporary option added for the purpose of testing functionality added
+// to DAGCombiner.cpp in D92230. It is expected that this can be removed
+// in future when both implementations will be based off MGATHER rather
+// than the GLD1 nodes added for the SVE gather load intrinsics.
+static cl::opt<bool>
+EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
+ cl::desc("Combine extends of AArch64 masked "
+ "gather intrinsics"),
+ cl::init(true));
+
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
-static inline EVT getPackedSVEVectorVT(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("unexpected element type for vector");
- case MVT::i8:
- return MVT::nxv16i8;
- case MVT::i16:
- return MVT::nxv8i16;
- case MVT::i32:
- return MVT::nxv4i32;
- case MVT::i64:
- return MVT::nxv2i64;
- case MVT::f16:
- return MVT::nxv8f16;
- case MVT::f32:
- return MVT::nxv4f32;
- case MVT::f64:
- return MVT::nxv2f64;
- case MVT::bf16:
- return MVT::nxv8bf16;
- }
-}
-
-// NOTE: Currently there's only a need to return integer vector types. If this
-// changes then just add an extra "type" parameter.
-static inline EVT getPackedSVEVectorVT(ElementCount EC) {
- switch (EC.getKnownMinValue()) {
- default:
- llvm_unreachable("unexpected element count for vector");
- case 16:
- return MVT::nxv16i8;
- case 8:
- return MVT::nxv8i16;
- case 4:
- return MVT::nxv4i32;
- case 2:
- return MVT::nxv2i64;
- }
-}
-
-static inline EVT getPromotedVTForPredicate(EVT VT) {
- assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
- "Expected scalable predicate vector type!");
- switch (VT.getVectorMinNumElements()) {
- default:
- llvm_unreachable("unexpected element count for vector");
- case 2:
- return MVT::nxv2i64;
- case 4:
- return MVT::nxv4i32;
- case 8:
- return MVT::nxv8i16;
- case 16:
- return MVT::nxv16i8;
- }
-}
-
+static inline EVT getPackedSVEVectorVT(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unexpected element type for vector");
+ case MVT::i8:
+ return MVT::nxv16i8;
+ case MVT::i16:
+ return MVT::nxv8i16;
+ case MVT::i32:
+ return MVT::nxv4i32;
+ case MVT::i64:
+ return MVT::nxv2i64;
+ case MVT::f16:
+ return MVT::nxv8f16;
+ case MVT::f32:
+ return MVT::nxv4f32;
+ case MVT::f64:
+ return MVT::nxv2f64;
+ case MVT::bf16:
+ return MVT::nxv8bf16;
+ }
+}
+
+// NOTE: Currently there's only a need to return integer vector types. If this
+// changes then just add an extra "type" parameter.
+static inline EVT getPackedSVEVectorVT(ElementCount EC) {
+ switch (EC.getKnownMinValue()) {
+ default:
+ llvm_unreachable("unexpected element count for vector");
+ case 16:
+ return MVT::nxv16i8;
+ case 8:
+ return MVT::nxv8i16;
+ case 4:
+ return MVT::nxv4i32;
+ case 2:
+ return MVT::nxv2i64;
+ }
+}
+
+static inline EVT getPromotedVTForPredicate(EVT VT) {
+ assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
+ "Expected scalable predicate vector type!");
+ switch (VT.getVectorMinNumElements()) {
+ default:
+ llvm_unreachable("unexpected element count for vector");
+ case 2:
+ return MVT::nxv2i64;
+ case 4:
+ return MVT::nxv4i32;
+ case 8:
+ return MVT::nxv8i16;
+ case 16:
+ return MVT::nxv16i8;
+ }
+}
+
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
@@ -194,42 +194,42 @@ static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
-// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
-// predicate and end with a passthru value matching the result type.
-static bool isMergePassthruOpcode(unsigned Opc) {
- switch (Opc) {
- default:
- return false;
- case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
- case AArch64ISD::BSWAP_MERGE_PASSTHRU:
- case AArch64ISD::CTLZ_MERGE_PASSTHRU:
- case AArch64ISD::CTPOP_MERGE_PASSTHRU:
- case AArch64ISD::DUP_MERGE_PASSTHRU:
- case AArch64ISD::ABS_MERGE_PASSTHRU:
- case AArch64ISD::NEG_MERGE_PASSTHRU:
- case AArch64ISD::FNEG_MERGE_PASSTHRU:
- case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
- case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
- case AArch64ISD::FCEIL_MERGE_PASSTHRU:
- case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
- case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
- case AArch64ISD::FRINT_MERGE_PASSTHRU:
- case AArch64ISD::FROUND_MERGE_PASSTHRU:
- case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
- case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
- case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
- case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
- case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
- case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
- case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
- case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
- case AArch64ISD::FSQRT_MERGE_PASSTHRU:
- case AArch64ISD::FRECPX_MERGE_PASSTHRU:
- case AArch64ISD::FABS_MERGE_PASSTHRU:
- return true;
- }
-}
-
+// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
+// predicate and end with a passthru value matching the result type.
+static bool isMergePassthruOpcode(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
+ case AArch64ISD::BSWAP_MERGE_PASSTHRU:
+ case AArch64ISD::CTLZ_MERGE_PASSTHRU:
+ case AArch64ISD::CTPOP_MERGE_PASSTHRU:
+ case AArch64ISD::DUP_MERGE_PASSTHRU:
+ case AArch64ISD::ABS_MERGE_PASSTHRU:
+ case AArch64ISD::NEG_MERGE_PASSTHRU:
+ case AArch64ISD::FNEG_MERGE_PASSTHRU:
+ case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
+ case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
+ case AArch64ISD::FCEIL_MERGE_PASSTHRU:
+ case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
+ case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
+ case AArch64ISD::FRINT_MERGE_PASSTHRU:
+ case AArch64ISD::FROUND_MERGE_PASSTHRU:
+ case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
+ case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
+ case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
+ case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
+ case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
+ case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
+ case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
+ case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
+ case AArch64ISD::FSQRT_MERGE_PASSTHRU:
+ case AArch64ISD::FRECPX_MERGE_PASSTHRU:
+ case AArch64ISD::FABS_MERGE_PASSTHRU:
+ return true;
+ }
+}
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -263,8 +263,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
- if (Subtarget->hasBF16())
- addDRTypeForNEON(MVT::v4bf16);
+ if (Subtarget->hasBF16())
+ addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
@@ -273,8 +273,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
- if (Subtarget->hasBF16())
- addQRTypeForNEON(MVT::v8bf16);
+ if (Subtarget->hasBF16())
+ addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE()) {
@@ -303,7 +303,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
- if (Subtarget->useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
@@ -334,9 +334,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
- setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
- setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
@@ -402,12 +402,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
- setOperationAction(ISD::FADD, MVT::f128, LibCall);
+ setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
- setOperationAction(ISD::FDIV, MVT::f128, LibCall);
+ setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);
- setOperationAction(ISD::FMUL, MVT::f128, LibCall);
+ setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
@@ -415,7 +415,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
- setOperationAction(ISD::FSUB, MVT::f128, LibCall);
+ setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
@@ -451,10 +451,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
@@ -509,9 +509,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
- setOperationAction(ISD::ABS, MVT::i32, Custom);
- setOperationAction(ISD::ABS, MVT::i64, Custom);
-
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
+ setOperationAction(ISD::ABS, MVT::i64, Custom);
+
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -699,57 +699,57 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- // Generate outline atomics library calls only if LSE was not specified for
- // subtarget
- if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
-#define LCALLNAMES(A, B, N) \
- setLibcallName(A##N##_RELAX, #B #N "_relax"); \
- setLibcallName(A##N##_ACQ, #B #N "_acq"); \
- setLibcallName(A##N##_REL, #B #N "_rel"); \
- setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
-#define LCALLNAME4(A, B) \
- LCALLNAMES(A, B, 1) \
- LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
-#define LCALLNAME5(A, B) \
- LCALLNAMES(A, B, 1) \
- LCALLNAMES(A, B, 2) \
- LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
- LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
- LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
- LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
- LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
- LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
- LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
-#undef LCALLNAMES
-#undef LCALLNAME4
-#undef LCALLNAME5
- }
-
+ // Generate outline atomics library calls only if LSE was not specified for
+ // subtarget
+ if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
+#define LCALLNAMES(A, B, N) \
+ setLibcallName(A##N##_RELAX, #B #N "_relax"); \
+ setLibcallName(A##N##_ACQ, #B #N "_acq"); \
+ setLibcallName(A##N##_REL, #B #N "_rel"); \
+ setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
+#define LCALLNAME4(A, B) \
+ LCALLNAMES(A, B, 1) \
+ LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
+#define LCALLNAME5(A, B) \
+ LCALLNAMES(A, B, 1) \
+ LCALLNAMES(A, B, 2) \
+ LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
+ LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
+ LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
+#undef LCALLNAMES
+#undef LCALLNAME4
+#undef LCALLNAME5
+ }
+
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -839,8 +839,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
- setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
@@ -850,7 +850,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ABS);
+ setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
@@ -867,15 +867,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
- setTargetDAGCombine(ISD::MGATHER);
- setTargetDAGCombine(ISD::MSCATTER);
-
+ setTargetDAGCombine(ISD::MGATHER);
+ setTargetDAGCombine(ISD::MSCATTER);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
@@ -884,8 +884,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
- setTargetDAGCombine(ISD::VECREDUCE_ADD);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::GlobalAddress);
@@ -1005,34 +1005,34 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- // Saturates
+ // Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
- }
+ }
- // Vector reductions
+ // Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
- if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
-
- setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
- }
- }
- for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
- MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
+ }
}
- setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
+ for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+ MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ }
+ setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -1093,112 +1093,112 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
// splat of 0 or undef) once vector selects supported in SVE codegen. See
// D68877 for more details.
- for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::BSWAP, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::ABS, VT, Custom);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::BSWAP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
}
- // Illegal unpacked integer vector types.
- for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
+ // Illegal unpacked integer vector types.
+ for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- }
-
- for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
-
- // There are no legal MVT::nxv16f## based types.
- if (VT != MVT::nxv16i1) {
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ }
+
+ for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+
+ // There are no legal MVT::nxv16f## based types.
+ if (VT != MVT::nxv16i1) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+ setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
}
- for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
- MVT::nxv4f32, MVT::nxv2f64}) {
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
- setOperationAction(ISD::FNEARBYINT, VT, Custom);
- setOperationAction(ISD::FRINT, VT, Custom);
- setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FROUNDEVEN, VT, Custom);
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- }
-
- for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
- }
-
- setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
-
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
-
+ for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
+ MVT::nxv4f32, MVT::nxv2f64}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FDIV, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FMAXNUM, VT, Custom);
+ setOperationAction(ISD::FMINNUM, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction(ISD::FNEARBYINT, VT, Custom);
+ setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FROUNDEVEN, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FSQRT, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ }
+
+ for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
+
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
+
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
- if (Subtarget->useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
@@ -1216,61 +1216,61 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, VT, Custom);
for (auto VT : {MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::FP_ROUND, VT, Expand);
-
- // These operations are not supported on NEON but SVE can do them.
- setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
- setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
- setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
- setOperationAction(ISD::MUL, MVT::v1i64, Custom);
- setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
- setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
- setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
- setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
- setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
- setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
- setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
- setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
- setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
- setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
- setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
- setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
- setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
- setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
- setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
- setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
- setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
- setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
- setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
- setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
-
- // Int operations with no NEON support.
- for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- }
-
- // FP operations with no NEON support.
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
- MVT::v1f64, MVT::v2f64})
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
-
- // Use SVE for vectors with more than 2 elements.
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+
+ // These operations are not supported on NEON but SVE can do them.
+ setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v1i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
+ setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
+ setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
+
+ // Int operations with no NEON support.
+ for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ }
+
+ // FP operations with no NEON support.
+ for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
+ MVT::v1f64, MVT::v2f64})
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+
+ // Use SVE for vectors with more than 2 elements.
+ for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
}
}
@@ -1342,7 +1342,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
- VT.getVectorElementType() != MVT::bf16 &&
+ VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
@@ -1368,64 +1368,64 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Lower fixed length vector operations to scalable equivalents.
- setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::BSWAP, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::AND, VT, Custom);
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+ setOperationAction(ISD::BITREVERSE, VT, Custom);
+ setOperationAction(ISD::BSWAP, VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FNEARBYINT, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FRINT, VT, Custom);
- setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FDIV, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FMAXNUM, VT, Custom);
+ setOperationAction(ISD::FMINNUM, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FNEARBYINT, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FRINT, VT, Custom);
+ setOperationAction(ISD::FROUND, VT, Custom);
+ setOperationAction(ISD::FSQRT, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
- setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Custom);
+ setOperationAction(ISD::XOR, VT, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
@@ -1597,7 +1597,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = KnownBits::commonBits(Known, Known2);
break;
}
case AArch64ISD::LOADgot:
@@ -1737,38 +1737,38 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ADD_PRED)
- MAKE_CASE(AArch64ISD::MUL_PRED)
+ MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
- MAKE_CASE(AArch64ISD::SHL_PRED)
- MAKE_CASE(AArch64ISD::SMAX_PRED)
- MAKE_CASE(AArch64ISD::SMIN_PRED)
- MAKE_CASE(AArch64ISD::SRA_PRED)
- MAKE_CASE(AArch64ISD::SRL_PRED)
- MAKE_CASE(AArch64ISD::SUB_PRED)
+ MAKE_CASE(AArch64ISD::SHL_PRED)
+ MAKE_CASE(AArch64ISD::SMAX_PRED)
+ MAKE_CASE(AArch64ISD::SMIN_PRED)
+ MAKE_CASE(AArch64ISD::SRA_PRED)
+ MAKE_CASE(AArch64ISD::SRL_PRED)
+ MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
- MAKE_CASE(AArch64ISD::UMAX_PRED)
- MAKE_CASE(AArch64ISD::UMIN_PRED)
- MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::UMAX_PRED)
+ MAKE_CASE(AArch64ISD::UMIN_PRED)
+ MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)
@@ -1837,14 +1837,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::SRHADD)
MAKE_CASE(AArch64ISD::URHADD)
- MAKE_CASE(AArch64ISD::SHADD)
- MAKE_CASE(AArch64ISD::UHADD)
+ MAKE_CASE(AArch64ISD::SHADD)
+ MAKE_CASE(AArch64ISD::UHADD)
MAKE_CASE(AArch64ISD::SMINV)
MAKE_CASE(AArch64ISD::UMINV)
MAKE_CASE(AArch64ISD::SMAXV)
MAKE_CASE(AArch64ISD::UMAXV)
- MAKE_CASE(AArch64ISD::SADDV_PRED)
- MAKE_CASE(AArch64ISD::UADDV_PRED)
+ MAKE_CASE(AArch64ISD::SADDV_PRED)
+ MAKE_CASE(AArch64ISD::UADDV_PRED)
MAKE_CASE(AArch64ISD::SMAXV_PRED)
MAKE_CASE(AArch64ISD::UMAXV_PRED)
MAKE_CASE(AArch64ISD::SMINV_PRED)
@@ -1862,16 +1862,16 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
- MAKE_CASE(AArch64ISD::FDIV_PRED)
+ MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
- MAKE_CASE(AArch64ISD::FMAXNM_PRED)
+ MAKE_CASE(AArch64ISD::FMAXNM_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
- MAKE_CASE(AArch64ISD::FMINNM_PRED)
+ MAKE_CASE(AArch64ISD::FMINNM_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
- MAKE_CASE(AArch64ISD::FMUL_PRED)
- MAKE_CASE(AArch64ISD::FSUB_PRED)
+ MAKE_CASE(AArch64ISD::FMUL_PRED)
+ MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
@@ -1983,15 +1983,15 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
- MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
- MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
+ MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
- MAKE_CASE(AArch64ISD::UABD)
- MAKE_CASE(AArch64ISD::SABD)
- MAKE_CASE(AArch64ISD::CALL_RVMARKER)
+ MAKE_CASE(AArch64ISD::UABD)
+ MAKE_CASE(AArch64ISD::SABD)
+ MAKE_CASE(AArch64ISD::CALL_RVMARKER)
}
#undef MAKE_CASE
return nullptr;
@@ -2079,7 +2079,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
- case TargetOpcode::STATEPOINT:
+ case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
@@ -2905,9 +2905,9 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
return std::make_pair(Value, Overflow);
}
-SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToScalableOp(Op, DAG);
+SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
@@ -3083,18 +3083,18 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isScalableVector())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
-
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
+
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
- return SDValue();
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isScalableVector())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
-
+ if (Op.getValueType().isScalableVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
+
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
@@ -3108,7 +3108,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
return Op;
}
- return SDValue();
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
@@ -3118,14 +3118,14 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
-
- if (VT.isScalableVector()) {
- unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
- ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
- : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
- return LowerToPredicatedOp(Op, DAG, Opcode);
- }
-
+
+ if (VT.isScalableVector()) {
+ unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
+ ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
+ : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
+ return LowerToPredicatedOp(Op, DAG, Opcode);
+ }
+
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
@@ -3138,9 +3138,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
- uint64_t VTSize = VT.getFixedSizeInBits();
- uint64_t InVTSize = InVT.getFixedSizeInBits();
- if (VTSize < InVTSize) {
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVTSize = InVT.getFixedSizeInBits();
+ if (VTSize < InVTSize) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
@@ -3148,7 +3148,7 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
- if (VTSize > InVTSize) {
+ if (VTSize > InVTSize) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
@@ -3183,11 +3183,11 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
return Op;
}
- return SDValue();
+ return SDValue();
}
-SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
@@ -3195,38 +3195,38 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
- unsigned Opc = Op.getOpcode();
- bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
-
- if (VT.isScalableVector()) {
- if (InVT.getVectorElementType() == MVT::i1) {
- // We can't directly extend an SVE predicate; extend it first.
- unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- EVT CastVT = getPromotedVTForPredicate(InVT);
- In = DAG.getNode(CastOpc, dl, CastVT, In);
- return DAG.getNode(Opc, dl, VT, In);
- }
-
- unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
- : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
- return LowerToPredicatedOp(Op, DAG, Opcode);
- }
-
- uint64_t VTSize = VT.getFixedSizeInBits();
- uint64_t InVTSize = InVT.getFixedSizeInBits();
- if (VTSize < InVTSize) {
+ unsigned Opc = Op.getOpcode();
+ bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
+
+ if (VT.isScalableVector()) {
+ if (InVT.getVectorElementType() == MVT::i1) {
+ // We can't directly extend an SVE predicate; extend it first.
+ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ EVT CastVT = getPromotedVTForPredicate(InVT);
+ In = DAG.getNode(CastOpc, dl, CastVT, In);
+ return DAG.getNode(Opc, dl, VT, In);
+ }
+
+ unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
+ : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
+ return LowerToPredicatedOp(Op, DAG, Opcode);
+ }
+
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVTSize = InVT.getFixedSizeInBits();
+ if (VTSize < InVTSize) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
- In = DAG.getNode(Opc, dl, CastVT, In);
+ In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
- if (VTSize > InVTSize) {
- unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ if (VTSize > InVTSize) {
+ unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
- return DAG.getNode(Opc, dl, VT, In);
+ return DAG.getNode(Opc, dl, VT, In);
}
return Op;
@@ -3259,7 +3259,7 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
- return SDValue();
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
@@ -3373,8 +3373,8 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
@@ -3399,13 +3399,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ANY_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::ZERO_EXTEND ||
- N->getOpcode() == ISD::ANY_EXTEND ||
+ N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
@@ -3454,15 +3454,15 @@ SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
return DAG.getMergeValues({AND, Chain}, dl);
}
-SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- // If SVE is available then i64 vector multiplications can also be made legal.
- bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
-
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
-
+SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // If SVE is available then i64 vector multiplications can also be made legal.
+ bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
+
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
+
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&
@@ -3623,77 +3623,77 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_ptrue:
return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
Op.getOperand(1));
- case Intrinsic::aarch64_sve_clz:
- return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_cnt: {
- SDValue Data = Op.getOperand(3);
- // CTPOP only supports integer operands.
- if (Data.getValueType().isFloatingPoint())
- Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
- return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Data, Op.getOperand(1));
- }
+ case Intrinsic::aarch64_sve_clz:
+ return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_cnt: {
+ SDValue Data = Op.getOperand(3);
+ // CTPOP only supports integer operands.
+ if (Data.getValueType().isFloatingPoint())
+ Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
+ return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Data, Op.getOperand(1));
+ }
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
Op.getOperand(1));
- case Intrinsic::aarch64_sve_fneg:
- return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frintp:
- return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frintm:
- return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frinti:
- return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frintx:
- return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frinta:
- return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frintn:
- return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frintz:
- return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_ucvtf:
- return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_scvtf:
- return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_fcvtzu:
- return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_fcvtzs:
- return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_fsqrt:
- return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_frecpx:
- return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_fabs:
- return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_abs:
- return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_neg:
- return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fneg:
+ return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintp:
+ return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintm:
+ return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frinti:
+ return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintx:
+ return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frinta:
+ return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintn:
+ return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frintz:
+ return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_ucvtf:
+ return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_scvtf:
+ return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fcvtzu:
+ return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fcvtzs:
+ return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fsqrt:
+ return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecpx:
+ return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_fabs:
+ return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_abs:
+ return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_neg:
+ return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_convert_to_svbool: {
EVT OutVT = Op.getValueType();
EVT InVT = Op.getOperand(1).getValueType();
@@ -3719,49 +3719,49 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}
- case Intrinsic::aarch64_sve_rbit:
- return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
- Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_revb:
- return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case Intrinsic::aarch64_sve_sxtb:
- return DAG.getNode(
- AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_sxth:
- return DAG.getNode(
- AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_sxtw:
- return DAG.getNode(
- AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_uxtb:
- return DAG.getNode(
- AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_uxth:
- return DAG.getNode(
- AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
- Op.getOperand(1));
- case Intrinsic::aarch64_sve_uxtw:
- return DAG.getNode(
- AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
- Op.getOperand(2), Op.getOperand(3),
- DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
- Op.getOperand(1));
+ case Intrinsic::aarch64_sve_rbit:
+ return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
+ Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_revb:
+ return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxtb:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxth:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_sxtw:
+ return DAG.getNode(
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxtb:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxth:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_uxtw:
+ return DAG.getNode(
+ AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(3),
+ DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
+ Op.getOperand(1));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
@@ -3801,291 +3801,291 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::aarch64_neon_srhadd:
- case Intrinsic::aarch64_neon_urhadd:
- case Intrinsic::aarch64_neon_shadd:
- case Intrinsic::aarch64_neon_uhadd: {
- bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
- IntNo == Intrinsic::aarch64_neon_shadd);
- bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
- IntNo == Intrinsic::aarch64_neon_urhadd);
- unsigned Opcode =
- IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
- : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ case Intrinsic::aarch64_neon_shadd:
+ case Intrinsic::aarch64_neon_uhadd: {
+ bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
+ IntNo == Intrinsic::aarch64_neon_shadd);
+ bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
+ IntNo == Intrinsic::aarch64_neon_urhadd);
+ unsigned Opcode =
+ IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
+ : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
-
- case Intrinsic::aarch64_neon_uabd: {
- return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
- case Intrinsic::aarch64_neon_sabd: {
- return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
- }
- }
-}
-
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
- if (VT.getVectorElementType() == MVT::i32 &&
- VT.getVectorElementCount().getKnownMinValue() >= 4)
- return true;
-
- return false;
-}
-
+
+ case Intrinsic::aarch64_neon_uabd: {
+ return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::aarch64_neon_sabd: {
+ return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+}
+
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
+ if (VT.getVectorElementType() == MVT::i32 &&
+ VT.getVectorElementCount().getKnownMinValue() >= 4)
+ return true;
+
+ return false;
+}
+
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector();
}
-unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
- std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::GLD1_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::GLD1_UXTW_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::GLD1_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::GLD1_SXTW_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::GLD1_SCALED_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::GLD1_SCALED_MERGE_ZERO},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
- };
- auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
- return AddrModes.find(Key)->second;
-}
-
-unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
- std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::SST1_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::SST1_UXTW_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::SST1_PRED},
- {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::SST1_SXTW_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
- AArch64ISD::SST1_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
- AArch64ISD::SST1_UXTW_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
- AArch64ISD::SST1_SCALED_PRED},
- {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
- AArch64ISD::SST1_SXTW_SCALED_PRED},
- };
- auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
- return AddrModes.find(Key)->second;
-}
-
-unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- llvm_unreachable("unimplemented opcode");
- return Opcode;
- case AArch64ISD::GLD1_MERGE_ZERO:
- return AArch64ISD::GLD1S_MERGE_ZERO;
- case AArch64ISD::GLD1_IMM_MERGE_ZERO:
- return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
- case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
- return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
- case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
- return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
- case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
- return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
- case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
- return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
- case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
- return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
- }
-}
-
-bool getGatherScatterIndexIsExtended(SDValue Index) {
- unsigned Opcode = Index.getOpcode();
- if (Opcode == ISD::SIGN_EXTEND_INREG)
- return true;
-
- if (Opcode == ISD::AND) {
- SDValue Splat = Index.getOperand(1);
- if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
- return false;
- ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
- if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
- return false;
- return true;
- }
-
- return false;
-}
-
-// If the base pointer of a masked gather or scatter is null, we
-// may be able to swap BasePtr & Index and use the vector + register
-// or vector + immediate addressing mode, e.g.
-// VECTOR + REGISTER:
-// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
-// -> getelementptr %offset, <vscale x N x T> %indices
-// VECTOR + IMMEDIATE:
-// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
-// -> getelementptr #x, <vscale x N x T> %indices
-void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
- unsigned &Opcode, bool IsGather,
- SelectionDAG &DAG) {
- if (!isNullConstant(BasePtr))
- return;
-
- ConstantSDNode *Offset = nullptr;
- if (Index.getOpcode() == ISD::ADD)
- if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
- if (isa<ConstantSDNode>(SplatVal))
- Offset = cast<ConstantSDNode>(SplatVal);
- else {
- BasePtr = SplatVal;
- Index = Index->getOperand(0);
- return;
- }
- }
-
- unsigned NewOp =
- IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
-
- if (!Offset) {
- std::swap(BasePtr, Index);
- Opcode = NewOp;
- return;
- }
-
- uint64_t OffsetVal = Offset->getZExtValue();
- unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
- auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
-
- if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
- // Index is out of range for the immediate addressing mode
- BasePtr = ConstOffset;
- Index = Index->getOperand(0);
- return;
- }
-
- // Immediate is in range
- Opcode = NewOp;
- BasePtr = Index->getOperand(0);
- Index = ConstOffset;
-}
-
-SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
- assert(MGT && "Can only custom lower gather load nodes");
-
- SDValue Index = MGT->getIndex();
- SDValue Chain = MGT->getChain();
- SDValue PassThru = MGT->getPassThru();
- SDValue Mask = MGT->getMask();
- SDValue BasePtr = MGT->getBasePtr();
- ISD::LoadExtType ExtTy = MGT->getExtensionType();
-
- ISD::MemIndexType IndexType = MGT->getIndexType();
- bool IsScaled =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
- bool IsSigned =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
- bool IdxNeedsExtend =
- getGatherScatterIndexIsExtended(Index) ||
- Index.getSimpleValueType().getVectorElementType() == MVT::i32;
- bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
-
- EVT VT = PassThru.getSimpleValueType();
- EVT MemVT = MGT->getMemoryVT();
- SDValue InputVT = DAG.getValueType(MemVT);
-
- if (VT.getVectorElementType() == MVT::bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
-
- // Handle FP data by using an integer gather and casting the result.
- if (VT.isFloatingPoint()) {
- EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
- PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
- InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
- }
-
- SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
-
- if (getGatherScatterIndexIsExtended(Index))
- Index = Index.getOperand(0);
-
- unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
- selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
- /*isGather=*/true, DAG);
-
- if (ResNeedsSignExtend)
- Opcode = getSignExtendedGatherOpcode(Opcode);
-
- SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
- SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
-
- if (VT.isFloatingPoint()) {
- SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
- return DAG.getMergeValues({Cast, Gather}, DL);
- }
-
- return Gather;
-}
-
-SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
- assert(MSC && "Can only custom lower scatter store nodes");
-
- SDValue Index = MSC->getIndex();
- SDValue Chain = MSC->getChain();
- SDValue StoreVal = MSC->getValue();
- SDValue Mask = MSC->getMask();
- SDValue BasePtr = MSC->getBasePtr();
-
- ISD::MemIndexType IndexType = MSC->getIndexType();
- bool IsScaled =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
- bool IsSigned =
- IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
- bool NeedsExtend =
- getGatherScatterIndexIsExtended(Index) ||
- Index.getSimpleValueType().getVectorElementType() == MVT::i32;
-
- EVT VT = StoreVal.getSimpleValueType();
- SDVTList VTs = DAG.getVTList(MVT::Other);
- EVT MemVT = MSC->getMemoryVT();
- SDValue InputVT = DAG.getValueType(MemVT);
-
- if (VT.getVectorElementType() == MVT::bf16 &&
- !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
- return SDValue();
-
- // Handle FP data by casting the data so an integer scatter can be used.
- if (VT.isFloatingPoint()) {
- EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
- StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
- InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
- }
-
- if (getGatherScatterIndexIsExtended(Index))
- Index = Index.getOperand(0);
-
- unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
- selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
- /*isGather=*/false, DAG);
-
- SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
- return DAG.getNode(Opcode, DL, VTs, Ops);
-}
-
+unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
+ std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::GLD1_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::GLD1_UXTW_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::GLD1_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::GLD1_SXTW_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::GLD1_SCALED_MERGE_ZERO},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
+ };
+ auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
+ return AddrModes.find(Key)->second;
+}
+
+unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
+ std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::SST1_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::SST1_UXTW_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::SST1_PRED},
+ {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::SST1_SXTW_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
+ AArch64ISD::SST1_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
+ AArch64ISD::SST1_UXTW_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
+ AArch64ISD::SST1_SCALED_PRED},
+ {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
+ AArch64ISD::SST1_SXTW_SCALED_PRED},
+ };
+ auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
+ return AddrModes.find(Key)->second;
+}
+
+unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unimplemented opcode");
+ return Opcode;
+ case AArch64ISD::GLD1_MERGE_ZERO:
+ return AArch64ISD::GLD1S_MERGE_ZERO;
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
+ case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
+ return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
+ case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
+ case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
+ case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
+ case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
+ return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
+ }
+}
+
+bool getGatherScatterIndexIsExtended(SDValue Index) {
+ unsigned Opcode = Index.getOpcode();
+ if (Opcode == ISD::SIGN_EXTEND_INREG)
+ return true;
+
+ if (Opcode == ISD::AND) {
+ SDValue Splat = Index.getOperand(1);
+ if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
+ return false;
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
+ if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+// If the base pointer of a masked gather or scatter is null, we
+// may be able to swap BasePtr & Index and use the vector + register
+// or vector + immediate addressing mode, e.g.
+// VECTOR + REGISTER:
+// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
+// -> getelementptr %offset, <vscale x N x T> %indices
+// VECTOR + IMMEDIATE:
+// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
+// -> getelementptr #x, <vscale x N x T> %indices
+void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
+ unsigned &Opcode, bool IsGather,
+ SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr))
+ return;
+
+ ConstantSDNode *Offset = nullptr;
+ if (Index.getOpcode() == ISD::ADD)
+ if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
+ if (isa<ConstantSDNode>(SplatVal))
+ Offset = cast<ConstantSDNode>(SplatVal);
+ else {
+ BasePtr = SplatVal;
+ Index = Index->getOperand(0);
+ return;
+ }
+ }
+
+ unsigned NewOp =
+ IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
+
+ if (!Offset) {
+ std::swap(BasePtr, Index);
+ Opcode = NewOp;
+ return;
+ }
+
+ uint64_t OffsetVal = Offset->getZExtValue();
+ unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
+ auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
+
+ if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
+ // Index is out of range for the immediate addressing mode
+ BasePtr = ConstOffset;
+ Index = Index->getOperand(0);
+ return;
+ }
+
+ // Immediate is in range
+ Opcode = NewOp;
+ BasePtr = Index->getOperand(0);
+ Index = ConstOffset;
+}
+
+SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
+ assert(MGT && "Can only custom lower gather load nodes");
+
+ SDValue Index = MGT->getIndex();
+ SDValue Chain = MGT->getChain();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Mask = MGT->getMask();
+ SDValue BasePtr = MGT->getBasePtr();
+ ISD::LoadExtType ExtTy = MGT->getExtensionType();
+
+ ISD::MemIndexType IndexType = MGT->getIndexType();
+ bool IsScaled =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
+ bool IsSigned =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
+ bool IdxNeedsExtend =
+ getGatherScatterIndexIsExtended(Index) ||
+ Index.getSimpleValueType().getVectorElementType() == MVT::i32;
+ bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
+
+ EVT VT = PassThru.getSimpleValueType();
+ EVT MemVT = MGT->getMemoryVT();
+ SDValue InputVT = DAG.getValueType(MemVT);
+
+ if (VT.getVectorElementType() == MVT::bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
+ // Handle FP data by using an integer gather and casting the result.
+ if (VT.isFloatingPoint()) {
+ EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
+ PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
+ InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
+ }
+
+ SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
+
+ if (getGatherScatterIndexIsExtended(Index))
+ Index = Index.getOperand(0);
+
+ unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/true, DAG);
+
+ if (ResNeedsSignExtend)
+ Opcode = getSignExtendedGatherOpcode(Opcode);
+
+ SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
+ SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
+
+ if (VT.isFloatingPoint()) {
+ SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
+ return DAG.getMergeValues({Cast, Gather}, DL);
+ }
+
+ return Gather;
+}
+
+SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
+ assert(MSC && "Can only custom lower scatter store nodes");
+
+ SDValue Index = MSC->getIndex();
+ SDValue Chain = MSC->getChain();
+ SDValue StoreVal = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+ SDValue BasePtr = MSC->getBasePtr();
+
+ ISD::MemIndexType IndexType = MSC->getIndexType();
+ bool IsScaled =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
+ bool IsSigned =
+ IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
+ bool NeedsExtend =
+ getGatherScatterIndexIsExtended(Index) ||
+ Index.getSimpleValueType().getVectorElementType() == MVT::i32;
+
+ EVT VT = StoreVal.getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ EVT MemVT = MSC->getMemoryVT();
+ SDValue InputVT = DAG.getValueType(MemVT);
+
+ if (VT.getVectorElementType() == MVT::bf16 &&
+ !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
+ return SDValue();
+
+ // Handle FP data by casting the data so an integer scatter can be used.
+ if (VT.isFloatingPoint()) {
+ EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
+ StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
+ InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
+ }
+
+ if (getGatherScatterIndexIsExtended(Index))
+ Index = Index.getOperand(0);
+
+ unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/false, DAG);
+
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
+ return DAG.getNode(Opcode, DL, VTs, Ops);
+}
+
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
@@ -4151,9 +4151,9 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
// the custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
- ElementCount EC = MemVT.getVectorElementCount();
+ ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
- EC.isKnownEven() &&
+ EC.isKnownEven() &&
((MemVT.getScalarSizeInBits() == 8u ||
MemVT.getScalarSizeInBits() == 16u ||
MemVT.getScalarSizeInBits() == 32u ||
@@ -4162,11 +4162,11 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
- SDValue Hi =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
- MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
- StoreNode->getValue(),
- DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
+ MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
+ StoreNode->getValue(),
+ DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
@@ -4191,25 +4191,25 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
return SDValue();
}
-// Generate SUBS and CSEL for integer abs.
-SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getSimpleValueType();
-
- if (VT.isVector())
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
-
- SDLoc DL(Op);
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- Op.getOperand(0));
- // Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- Op.getOperand(0), DAG.getConstant(0, DL, VT));
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
- DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
- Cmp.getValue(1));
-}
-
+// Generate SUBS and CSEL for integer abs.
+SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
+
+ SDLoc DL(Op);
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Op.getOperand(0));
+ // Generate SUBS & CSEL.
+ SDValue Cmp =
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
+ DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
+ Cmp.getValue(1));
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -4262,35 +4262,35 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
case ISD::FSUB:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
case ISD::FMUL:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
- case ISD::FNEG:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
- case ISD::FCEIL:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
- case ISD::FFLOOR:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
- case ISD::FNEARBYINT:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
- case ISD::FRINT:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
- case ISD::FROUND:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
- case ISD::FROUNDEVEN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
- case ISD::FTRUNC:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
- case ISD::FSQRT:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
- case ISD::FABS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
+ case ISD::FNEG:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
+ case ISD::FCEIL:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
+ case ISD::FFLOOR:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
+ case ISD::FNEARBYINT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
+ case ISD::FRINT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
+ case ISD::FROUND:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
+ case ISD::FROUNDEVEN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
+ case ISD::FTRUNC:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
+ case ISD::FSQRT:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
+ case ISD::FABS:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
@@ -4304,8 +4304,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
- case ISD::CONCAT_VECTORS:
- return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::CONCAT_VECTORS:
+ return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
@@ -4322,19 +4322,19 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
case ISD::UDIV:
- return LowerDIV(Op, DAG);
+ return LowerDIV(Op, DAG);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
+ /*OverrideNEON=*/true);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
+ /*OverrideNEON=*/true);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
+ /*OverrideNEON=*/true);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
+ /*OverrideNEON=*/true);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -4374,21 +4374,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
return LowerSTORE(Op, DAG);
- case ISD::MGATHER:
- return LowerMGATHER(Op, DAG);
- case ISD::MSCATTER:
- return LowerMSCATTER(Op, DAG);
- case ISD::VECREDUCE_SEQ_FADD:
- return LowerVECREDUCE_SEQ_FADD(Op, DAG);
+ case ISD::MGATHER:
+ return LowerMGATHER(Op, DAG);
+ case ISD::MSCATTER:
+ return LowerMSCATTER(Op, DAG);
+ case ISD::VECREDUCE_SEQ_FADD:
+ return LowerVECREDUCE_SEQ_FADD(Op, DAG);
case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
@@ -4400,21 +4400,21 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:
return LowerVSCALE(Op, DAG);
- case ISD::ANY_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
- case ISD::SIGN_EXTEND_INREG: {
- // Only custom lower when ExtraVT has a legal byte based element type.
- EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- EVT ExtraEltVT = ExtraVT.getVectorElementType();
- if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
- (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
- return SDValue();
-
- return LowerToPredicatedOp(Op, DAG,
- AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
- }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: {
+ // Only custom lower when ExtraVT has a legal byte based element type.
+ EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
+ (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
+ return SDValue();
+
+ return LowerToPredicatedOp(Op, DAG,
+ AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
+ }
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
case ISD::LOAD:
@@ -4422,49 +4422,49 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
llvm_unreachable("Unexpected request to lower ISD::LOAD");
case ISD::ADD:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
- case ISD::AND:
- return LowerToScalableOp(Op, DAG);
- case ISD::SUB:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
- case ISD::FMAXNUM:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
- case ISD::FMINNUM:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
- case ISD::VSELECT:
- return LowerFixedLengthVectorSelectToSVE(Op, DAG);
- case ISD::ABS:
- return LowerABS(Op, DAG);
- case ISD::BITREVERSE:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
- /*OverrideNEON=*/true);
- case ISD::BSWAP:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
- case ISD::CTLZ:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
- /*OverrideNEON=*/true);
- case ISD::CTTZ:
- return LowerCTTZ(Op, DAG);
- }
-}
-
-bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
- return !Subtarget->useSVEForFixedLengthVectors();
-}
-
-bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
- EVT VT, bool OverrideNEON) const {
- if (!Subtarget->useSVEForFixedLengthVectors())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
+ case ISD::AND:
+ return LowerToScalableOp(Op, DAG);
+ case ISD::SUB:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
+ case ISD::FMAXNUM:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
+ case ISD::FMINNUM:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
+ case ISD::VSELECT:
+ return LowerFixedLengthVectorSelectToSVE(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
+ case ISD::BITREVERSE:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
+ /*OverrideNEON=*/true);
+ case ISD::BSWAP:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
+ case ISD::CTLZ:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
+ /*OverrideNEON=*/true);
+ case ISD::CTTZ:
+ return LowerCTTZ(Op, DAG);
+ }
+}
+
+bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
+ return !Subtarget->useSVEForFixedLengthVectors();
+}
+
+bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
+ EVT VT, bool OverrideNEON) const {
+ if (!Subtarget->useSVEForFixedLengthVectors())
return false;
if (!VT.isFixedLengthVector())
return false;
- // Don't use SVE for vectors we cannot scalarize if required.
- switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ // Don't use SVE for vectors we cannot scalarize if required.
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
// Fixed length predicates should be promoted to i8.
// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
- case MVT::i1:
+ case MVT::i1:
default:
return false;
case MVT::i8:
@@ -4477,16 +4477,16 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
break;
}
- // All SVE implementations support NEON sized vectors.
- if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
- return true;
-
+ // All SVE implementations support NEON sized vectors.
+ if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
+ return true;
+
// Ensure NEON MVTs only belong to a single register class.
- if (VT.getFixedSizeInBits() <= 128)
+ if (VT.getFixedSizeInBits() <= 128)
return false;
// Don't use SVE for types that don't fit.
- if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
+ if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
@@ -4586,9 +4586,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
(void)Res;
}
SmallVector<SDValue, 16> ArgValues;
- unsigned ExtraArgLocs = 0;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
+ unsigned ExtraArgLocs = 0;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
@@ -4716,44 +4716,44 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (VA.getLocInfo() == CCValAssign::Indirect) {
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
-
- uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
- unsigned NumParts = 1;
- if (Ins[i].Flags.isInConsecutiveRegs()) {
- assert(!Ins[i].Flags.isInConsecutiveRegsLast());
- while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
- ++NumParts;
- }
-
- MVT PartLoad = VA.getValVT();
- SDValue Ptr = ArgValue;
-
- // Ensure we generate all loads for each tuple part, whilst updating the
- // pointer after each load correctly using vscale.
- while (NumParts > 0) {
- ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
- InVals.push_back(ArgValue);
- NumParts--;
- if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- BytesIncrement, Flags);
- ExtraArgLocs++;
- i++;
- }
- }
- } else {
- if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
- ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
- ArgValue, DAG.getValueType(MVT::i32));
- InVals.push_back(ArgValue);
+
+ uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
+ unsigned NumParts = 1;
+ if (Ins[i].Flags.isInConsecutiveRegs()) {
+ assert(!Ins[i].Flags.isInConsecutiveRegsLast());
+ while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
+ ++NumParts;
+ }
+
+ MVT PartLoad = VA.getValVT();
+ SDValue Ptr = ArgValue;
+
+ // Ensure we generate all loads for each tuple part, whilst updating the
+ // pointer after each load correctly using vscale.
+ while (NumParts > 0) {
+ ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
+ InVals.push_back(ArgValue);
+ NumParts--;
+ if (NumParts > 0) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ BytesIncrement, Flags);
+ ExtraArgLocs++;
+ i++;
+ }
+ }
+ } else {
+ if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
+ ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
+ ArgValue, DAG.getValueType(MVT::i32));
+ InVals.push_back(ArgValue);
}
}
- assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
+ assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -4928,7 +4928,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
- CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
@@ -5351,9 +5351,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Walk the register/memloc assignments, inserting copies/loads.
- unsigned ExtraArgLocs = 0;
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
+ unsigned ExtraArgLocs = 0;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -5395,49 +5395,49 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
-
- uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
- uint64_t PartSize = StoreSize;
- unsigned NumParts = 1;
- if (Outs[i].Flags.isInConsecutiveRegs()) {
- assert(!Outs[i].Flags.isInConsecutiveRegsLast());
- while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
- ++NumParts;
- StoreSize *= NumParts;
- }
-
+
+ uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
+ uint64_t PartSize = StoreSize;
+ unsigned NumParts = 1;
+ if (Outs[i].Flags.isInConsecutiveRegs()) {
+ assert(!Outs[i].Flags.isInConsecutiveRegsLast());
+ while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
+ ++NumParts;
+ StoreSize *= NumParts;
+ }
+
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
- int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
- MFI.setStackID(FI, TargetStackID::ScalableVector);
+ int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
+ MFI.setStackID(FI, TargetStackID::ScalableVector);
- MachinePointerInfo MPI =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
- SDValue Ptr = DAG.getFrameIndex(
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
- SDValue SpillSlot = Ptr;
-
- // Ensure we generate all stores for each tuple part, whilst updating the
- // pointer after each store correctly using vscale.
- while (NumParts) {
- Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
- NumParts--;
- if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
-
- MPI = MachinePointerInfo(MPI.getAddrSpace());
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- BytesIncrement, Flags);
- ExtraArgLocs++;
- i++;
- }
- }
-
+ SDValue SpillSlot = Ptr;
+
+ // Ensure we generate all stores for each tuple part, whilst updating the
+ // pointer after each store correctly using vscale.
+ while (NumParts) {
+ Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
+ NumParts--;
+ if (NumParts > 0) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ MPI = MachinePointerInfo(MPI.getAddrSpace());
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ BytesIncrement, Flags);
+ ExtraArgLocs++;
+ i++;
+ }
+ }
+
Arg = SpillSlot;
break;
}
@@ -5457,18 +5457,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// take care of putting the two halves in the right place but we have to
// combine them.
SDValue &Bits =
- llvm::find_if(RegsToPass,
- [=](const std::pair<unsigned, SDValue> &Elt) {
- return Elt.first == VA.getLocReg();
- })
+ llvm::find_if(RegsToPass,
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
// Call site info is used for function's parameter entry value
// tracking. For now we track only simple cases when parameter
// is transferred through whole register.
- llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
- return ArgReg.Reg == VA.getLocReg();
- });
+ llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
+ return ArgReg.Reg == VA.getLocReg();
+ });
} else {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
@@ -5487,7 +5487,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
uint32_t BEAlign = 0;
unsigned OpSize;
if (VA.getLocInfo() == CCValAssign::Indirect)
- OpSize = VA.getLocVT().getFixedSizeInBits();
+ OpSize = VA.getLocVT().getFixedSizeInBits();
else
OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
@@ -5647,17 +5647,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
return Ret;
}
- unsigned CallOpc = AArch64ISD::CALL;
- // Calls marked with "rv_marker" are special. They should be expanded to the
- // call, directly followed by a special marker sequence. Use the CALL_RVMARKER
- // to do that.
- if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
- assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
- CallOpc = AArch64ISD::CALL_RVMARKER;
- }
-
+ unsigned CallOpc = AArch64ISD::CALL;
+ // Calls marked with "rv_marker" are special. They should be expanded to the
+ // call, directly followed by a special marker sequence. Use the CALL_RVMARKER
+ // to do that.
+ if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
+ assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
+ CallOpc = AArch64ISD::CALL_RVMARKER;
+ }
+
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
+ Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -5681,7 +5681,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
bool AArch64TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
@@ -5696,7 +5696,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
auto &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -5741,9 +5741,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (RegsUsed.count(VA.getLocReg())) {
SDValue &Bits =
- llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
- return Elt.first == VA.getLocReg();
- })->second;
+ llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
} else {
RetVals.emplace_back(VA.getLocReg(), Arg);
@@ -5963,7 +5963,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SDValue FuncTLVGet = DAG.getLoad(
PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- Align(PtrMemVT.getSizeInBits() / 8),
+ Align(PtrMemVT.getSizeInBits() / 8),
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
@@ -6278,22 +6278,22 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
llvm_unreachable("Unexpected platform trying to use TLS");
}
-// Looks through \param Val to determine the bit that can be used to
-// check the sign of the value. It returns the unextended value and
-// the sign bit position.
-std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
- if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
- return {Val.getOperand(0),
- cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
- 1};
-
- if (Val.getOpcode() == ISD::SIGN_EXTEND)
- return {Val.getOperand(0),
- Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
-
- return {Val, Val.getValueSizeInBits() - 1};
-}
-
+// Looks through \param Val to determine the bit that can be used to
+// check the sign of the value. It returns the unextended value and
+// the sign bit position.
+std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
+ if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ return {Val.getOperand(0),
+ cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
+ 1};
+
+ if (Val.getOpcode() == ISD::SIGN_EXTEND)
+ return {Val.getOperand(0),
+ Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
+
+ return {Val, Val.getValueSizeInBits() - 1};
+}
+
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -6388,10 +6388,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
- uint64_t SignBitPos;
- std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
+ uint64_t SignBitPos;
+ std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+ DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -6399,10 +6399,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
- uint64_t SignBitPos;
- std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
+ uint64_t SignBitPos;
+ std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
+ DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
SDValue CCVal;
@@ -6549,9 +6549,9 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
-
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
+
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
@@ -6575,16 +6575,16 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
return Val;
}
-SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- assert(VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
-
- SDLoc DL(Op);
- SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
- return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
-}
-
+SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
+
+ SDLoc DL(Op);
+ SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
+ return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
+}
+
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
@@ -6742,8 +6742,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// instead of a CSEL in that case.
if (TrueVal == ~FalseVal) {
Opcode = AArch64ISD::CSINV;
- } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
- TrueVal == -FalseVal) {
+ } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
+ TrueVal == -FalseVal) {
Opcode = AArch64ISD::CSNEG;
} else if (TVal.getValueType() == MVT::i32) {
// If our operands are only 32-bit wide, make sure we use 32-bit
@@ -6943,9 +6943,9 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
- auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
- AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
-
+ auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+ AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
+
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
@@ -7012,13 +7012,13 @@ SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
}
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
- auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
+ unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
+ auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
@@ -7028,64 +7028,64 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
- unsigned Offset = 0;
+ unsigned Offset = 0;
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
- Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
+ Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
- MachinePointerInfo(SV), Align(PtrSize)));
+ MachinePointerInfo(SV), Align(PtrSize)));
- // void *__gr_top at offset 8 (4 on ILP32)
- Offset += PtrSize;
+ // void *__gr_top at offset 8 (4 on ILP32)
+ Offset += PtrSize;
int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
- GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(Offset, DL, PtrVT));
+ GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
DAG.getConstant(GPRSize, DL, PtrVT));
- GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
+ GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
- MachinePointerInfo(SV, Offset),
- Align(PtrSize)));
+ MachinePointerInfo(SV, Offset),
+ Align(PtrSize)));
}
- // void *__vr_top at offset 16 (8 on ILP32)
- Offset += PtrSize;
+ // void *__vr_top at offset 16 (8 on ILP32)
+ Offset += PtrSize;
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(Offset, DL, PtrVT));
+ DAG.getConstant(Offset, DL, PtrVT));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
DAG.getConstant(FPRSize, DL, PtrVT));
- VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
+ VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
- MachinePointerInfo(SV, Offset),
- Align(PtrSize)));
- }
-
- // int __gr_offs at offset 24 (12 on ILP32)
- Offset += PtrSize;
- SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(Offset, DL, PtrVT));
- MemOps.push_back(
- DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
- GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
-
- // int __vr_offs at offset 28 (16 on ILP32)
- Offset += 4;
- SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
- DAG.getConstant(Offset, DL, PtrVT));
- MemOps.push_back(
- DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
- VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
+ MachinePointerInfo(SV, Offset),
+ Align(PtrSize)));
+ }
+
+ // int __gr_offs at offset 24 (12 on ILP32)
+ Offset += PtrSize;
+ SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
+ MemOps.push_back(
+ DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
+ GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
+
+ // int __vr_offs at offset 28 (16 on ILP32)
+ Offset += 4;
+ SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getConstant(Offset, DL, PtrVT));
+ MemOps.push_back(
+ DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
+ VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
@@ -7108,10 +7108,10 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
// pointer.
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
- unsigned VaListSize =
- (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
- ? PtrSize
- : Subtarget->isTargetILP32() ? 20 : 32;
+ unsigned VaListSize =
+ (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+ ? PtrSize
+ : Subtarget->isTargetILP32() ? 20 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
@@ -7264,34 +7264,34 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- SDValue ReturnAddress;
+ SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
- ReturnAddress = DAG.getLoad(
- VT, DL, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
- } else {
- // Return LR, which contains the return address. Mark it an implicit
- // live-in.
- unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
- ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
- }
-
- // The XPACLRI instruction assembles to a hint-space instruction before
- // Armv8.3-A therefore this instruction can be safely used for any pre
- // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
- // that instead.
- SDNode *St;
- if (Subtarget->hasPAuth()) {
- St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
- } else {
- // XPACLRI operates on LR therefore we must move the operand accordingly.
- SDValue Chain =
- DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
- St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
- }
- return SDValue(St, 0);
+ ReturnAddress = DAG.getLoad(
+ VT, DL, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
+ } else {
+ // Return LR, which contains the return address. Mark it an implicit
+ // live-in.
+ unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
+ ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+ }
+
+ // The XPACLRI instruction assembles to a hint-space instruction before
+ // Armv8.3-A therefore this instruction can be safely used for any pre
+ // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
+ // that instead.
+ SDNode *St;
+ if (Subtarget->hasPAuth()) {
+ St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
+ } else {
+ // XPACLRI operates on LR therefore we must move the operand accordingly.
+ SDValue Chain =
+ DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
+ St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
+ }
+ return SDValue(St, 0);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -7472,22 +7472,22 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
return SDValue();
}
-SDValue
-AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
- const DenormalMode &Mode) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
-}
-
-SDValue
-AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
- SelectionDAG &DAG) const {
- return Op;
-}
-
+SDValue
+AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+}
+
+SDValue
+AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op;
+}
+
SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps,
@@ -7511,7 +7511,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
- if (!Reciprocal)
+ if (!Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
ExtraSteps = 0;
@@ -7688,30 +7688,30 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- if (VT.isScalableVector())
- return std::make_pair(0U, nullptr);
- if (VT.getFixedSizeInBits() == 64)
+ if (VT.isScalableVector())
+ return std::make_pair(0U, nullptr);
+ if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
- case 'w': {
+ case 'w': {
if (!Subtarget->hasFPARMv8())
break;
- if (VT.isScalableVector()) {
- if (VT.getVectorElementType() != MVT::i1)
- return std::make_pair(0U, &AArch64::ZPRRegClass);
- return std::make_pair(0U, nullptr);
- }
- uint64_t VTSize = VT.getFixedSizeInBits();
- if (VTSize == 16)
+ if (VT.isScalableVector()) {
+ if (VT.getVectorElementType() != MVT::i1)
+ return std::make_pair(0U, &AArch64::ZPRRegClass);
+ return std::make_pair(0U, nullptr);
+ }
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ if (VTSize == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
- if (VTSize == 32)
+ if (VTSize == 32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
- if (VTSize == 64)
+ if (VTSize == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
- if (VTSize == 128)
+ if (VTSize == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
- }
+ }
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
@@ -7732,11 +7732,11 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
} else {
PredicateConstraint PC = parsePredicateConstraint(Constraint);
if (PC != PredicateConstraint::Invalid) {
- if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
- return std::make_pair(0U, nullptr);
+ if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+ return std::make_pair(0U, nullptr);
bool restricted = (PC == PredicateConstraint::Upl);
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
- : std::make_pair(0U, &AArch64::PPRRegClass);
+ : std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_lower(Constraint))
@@ -7975,8 +7975,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
- assert(!VT.isScalableVector() &&
- "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
+ assert(!VT.isScalableVector() &&
+ "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
@@ -8047,9 +8047,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
}
}
unsigned ResMultiplier =
- VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
- uint64_t VTSize = VT.getFixedSizeInBits();
- NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
+ VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
@@ -8058,18 +8058,18 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
- if (SrcVTSize == VTSize)
+ uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
+ if (SrcVTSize == VTSize)
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
- unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
+ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVTSize < VTSize) {
- assert(2 * SrcVTSize == VTSize);
+ if (SrcVTSize < VTSize) {
+ assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
@@ -8078,11 +8078,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
}
- if (SrcVTSize != 2 * VTSize) {
- LLVM_DEBUG(
- dbgs() << "Reshuffle failed: result vector too small to extract\n");
- return SDValue();
- }
+ if (SrcVTSize != 2 * VTSize) {
+ LLVM_DEBUG(
+ dbgs() << "Reshuffle failed: result vector too small to extract\n");
+ return SDValue();
+ }
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
LLVM_DEBUG(
@@ -8111,13 +8111,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
- if (!SrcVT.is64BitVector()) {
- LLVM_DEBUG(
- dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
- "for SVE vectors.");
- return SDValue();
- }
-
+ if (!SrcVT.is64BitVector()) {
+ LLVM_DEBUG(
+ dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
+ "for SVE vectors.");
+ return SDValue();
+ }
+
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Imm, dl, MVT::i32));
@@ -8134,8 +8134,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
- Src.WindowScale =
- SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
+ Src.WindowScale =
+ SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
@@ -8159,8 +8159,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
- int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
- VT.getScalarSizeInBits());
+ int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -8224,81 +8224,81 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
-/// Check if a vector shuffle corresponds to a DUP instructions with a larger
-/// element width than the vector lane type. If that is the case the function
-/// returns true and writes the value of the DUP instruction lane operand into
-/// DupLaneOp
-static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
- unsigned &DupLaneOp) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for wide DUP are: 16, 32, 64");
-
- if (BlockSize <= VT.getScalarSizeInBits())
- return false;
- if (BlockSize % VT.getScalarSizeInBits() != 0)
- return false;
- if (VT.getSizeInBits() % BlockSize != 0)
- return false;
-
- size_t SingleVecNumElements = VT.getVectorNumElements();
- size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
- size_t NumBlocks = VT.getSizeInBits() / BlockSize;
-
- // We are looking for masks like
- // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
- // might be replaced by 'undefined'. BlockIndices will eventually contain
- // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
- // for the above examples)
- SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
- for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
- for (size_t I = 0; I < NumEltsPerBlock; I++) {
- int Elt = M[BlockIndex * NumEltsPerBlock + I];
- if (Elt < 0)
- continue;
- // For now we don't support shuffles that use the second operand
- if ((unsigned)Elt >= SingleVecNumElements)
- return false;
- if (BlockElts[I] < 0)
- BlockElts[I] = Elt;
- else if (BlockElts[I] != Elt)
- return false;
- }
-
- // We found a candidate block (possibly with some undefs). It must be a
- // sequence of consecutive integers starting with a value divisible by
- // NumEltsPerBlock with some values possibly replaced by undef-s.
-
- // Find first non-undef element
- auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
- assert(FirstRealEltIter != BlockElts.end() &&
- "Shuffle with all-undefs must have been caught by previous cases, "
- "e.g. isSplat()");
- if (FirstRealEltIter == BlockElts.end()) {
- DupLaneOp = 0;
- return true;
- }
-
- // Index of FirstRealElt in BlockElts
- size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
-
- if ((unsigned)*FirstRealEltIter < FirstRealIndex)
- return false;
- // BlockElts[0] must have the following value if it isn't undef:
- size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
-
- // Check the first element
- if (Elt0 % NumEltsPerBlock != 0)
- return false;
- // Check that the sequence indeed consists of consecutive integers (modulo
- // undefs)
- for (size_t I = 0; I < NumEltsPerBlock; I++)
- if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
- return false;
-
- DupLaneOp = Elt0 / NumEltsPerBlock;
- return true;
-}
-
+/// Check if a vector shuffle corresponds to a DUP instructions with a larger
+/// element width than the vector lane type. If that is the case the function
+/// returns true and writes the value of the DUP instruction lane operand into
+/// DupLaneOp
+static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
+ unsigned &DupLaneOp) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for wide DUP are: 16, 32, 64");
+
+ if (BlockSize <= VT.getScalarSizeInBits())
+ return false;
+ if (BlockSize % VT.getScalarSizeInBits() != 0)
+ return false;
+ if (VT.getSizeInBits() % BlockSize != 0)
+ return false;
+
+ size_t SingleVecNumElements = VT.getVectorNumElements();
+ size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
+ size_t NumBlocks = VT.getSizeInBits() / BlockSize;
+
+ // We are looking for masks like
+ // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
+ // might be replaced by 'undefined'. BlockIndices will eventually contain
+ // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
+ // for the above examples)
+ SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
+ for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
+ for (size_t I = 0; I < NumEltsPerBlock; I++) {
+ int Elt = M[BlockIndex * NumEltsPerBlock + I];
+ if (Elt < 0)
+ continue;
+ // For now we don't support shuffles that use the second operand
+ if ((unsigned)Elt >= SingleVecNumElements)
+ return false;
+ if (BlockElts[I] < 0)
+ BlockElts[I] = Elt;
+ else if (BlockElts[I] != Elt)
+ return false;
+ }
+
+ // We found a candidate block (possibly with some undefs). It must be a
+ // sequence of consecutive integers starting with a value divisible by
+ // NumEltsPerBlock with some values possibly replaced by undef-s.
+
+ // Find first non-undef element
+ auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
+ assert(FirstRealEltIter != BlockElts.end() &&
+ "Shuffle with all-undefs must have been caught by previous cases, "
+ "e.g. isSplat()");
+ if (FirstRealEltIter == BlockElts.end()) {
+ DupLaneOp = 0;
+ return true;
+ }
+
+ // Index of FirstRealElt in BlockElts
+ size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
+
+ if ((unsigned)*FirstRealEltIter < FirstRealIndex)
+ return false;
+ // BlockElts[0] must have the following value if it isn't undef:
+ size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
+
+ // Check the first element
+ if (Elt0 % NumEltsPerBlock != 0)
+ return false;
+ // Check that the sequence indeed consists of consecutive integers (modulo
+ // undefs)
+ for (size_t I = 0; I < NumEltsPerBlock; I++)
+ if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
+ return false;
+
+ DupLaneOp = Elt0 / NumEltsPerBlock;
+ return true;
+}
+
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are different.
static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
@@ -8732,60 +8732,60 @@ static unsigned getDUPLANEOp(EVT EltType) {
llvm_unreachable("Invalid vector element type?");
}
-static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
- unsigned Opcode, SelectionDAG &DAG) {
- // Try to eliminate a bitcasted extract subvector before a DUPLANE.
- auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
- // Match: dup (bitcast (extract_subv X, C)), LaneC
- if (BitCast.getOpcode() != ISD::BITCAST ||
- BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
-
- // The extract index must align in the destination type. That may not
- // happen if the bitcast is from narrow to wide type.
- SDValue Extract = BitCast.getOperand(0);
- unsigned ExtIdx = Extract.getConstantOperandVal(1);
- unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
- unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
- unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
- if (ExtIdxInBits % CastedEltBitWidth != 0)
- return false;
-
- // Update the lane value by offsetting with the scaled extract index.
- LaneC += ExtIdxInBits / CastedEltBitWidth;
-
- // Determine the casted vector type of the wide vector input.
- // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
- // Examples:
- // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
- // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
- unsigned SrcVecNumElts =
- Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
- CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
- SrcVecNumElts);
- return true;
- };
- MVT CastVT;
- if (getScaledOffsetDup(V, Lane, CastVT)) {
- V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
- } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- // The lane is incremented by the index of the extract.
- // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
- Lane += V.getConstantOperandVal(1);
- V = V.getOperand(0);
- } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
- // The lane is decremented if we are splatting from the 2nd operand.
- // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
- unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
- Lane -= Idx * VT.getVectorNumElements() / 2;
- V = WidenVector(V.getOperand(Idx), DAG);
- } else if (VT.getSizeInBits() == 64) {
- // Widen the operand to 128-bit register with undef.
- V = WidenVector(V, DAG);
- }
- return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
-}
-
+static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
+ unsigned Opcode, SelectionDAG &DAG) {
+ // Try to eliminate a bitcasted extract subvector before a DUPLANE.
+ auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
+ // Match: dup (bitcast (extract_subv X, C)), LaneC
+ if (BitCast.getOpcode() != ISD::BITCAST ||
+ BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // The extract index must align in the destination type. That may not
+ // happen if the bitcast is from narrow to wide type.
+ SDValue Extract = BitCast.getOperand(0);
+ unsigned ExtIdx = Extract.getConstantOperandVal(1);
+ unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
+ unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
+ unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
+ if (ExtIdxInBits % CastedEltBitWidth != 0)
+ return false;
+
+ // Update the lane value by offsetting with the scaled extract index.
+ LaneC += ExtIdxInBits / CastedEltBitWidth;
+
+ // Determine the casted vector type of the wide vector input.
+ // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
+ // Examples:
+ // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
+ // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
+ unsigned SrcVecNumElts =
+ Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
+ CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
+ SrcVecNumElts);
+ return true;
+ };
+ MVT CastVT;
+ if (getScaledOffsetDup(V, Lane, CastVT)) {
+ V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
+ } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ // The lane is incremented by the index of the extract.
+ // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
+ Lane += V.getConstantOperandVal(1);
+ V = V.getOperand(0);
+ } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
+ // The lane is decremented if we are splatting from the 2nd operand.
+ // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
+ unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
+ Lane -= Idx * VT.getVectorNumElements() / 2;
+ V = WidenVector(V.getOperand(Idx), DAG);
+ } else if (VT.getSizeInBits() == 64) {
+ // Widen the operand to 128-bit register with undef.
+ V = WidenVector(V, DAG);
+ }
+ return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8819,25 +8819,25 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
- return constructDup(V1, Lane, dl, VT, Opcode, DAG);
- }
-
- // Check if the mask matches a DUP for a wider element
- for (unsigned LaneSize : {64U, 32U, 16U}) {
- unsigned Lane = 0;
- if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
- unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
- : LaneSize == 32 ? AArch64ISD::DUPLANE32
- : AArch64ISD::DUPLANE16;
- // Cast V1 to an integer vector with required lane size
- MVT NewEltTy = MVT::getIntegerVT(LaneSize);
- unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
- MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
- V1 = DAG.getBitcast(NewVecTy, V1);
- // Constuct the DUP instruction
- V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
- // Cast back to the original type
- return DAG.getBitcast(VT, V1);
+ return constructDup(V1, Lane, dl, VT, Opcode, DAG);
+ }
+
+ // Check if the mask matches a DUP for a wider element
+ for (unsigned LaneSize : {64U, 32U, 16U}) {
+ unsigned Lane = 0;
+ if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
+ unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
+ : LaneSize == 32 ? AArch64ISD::DUPLANE32
+ : AArch64ISD::DUPLANE16;
+ // Cast V1 to an integer vector with required lane size
+ MVT NewEltTy = MVT::getIntegerVT(LaneSize);
+ unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
+ MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
+ V1 = DAG.getBitcast(NewVecTy, V1);
+ // Constuct the DUP instruction
+ V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
+ // Cast back to the original type
+ return DAG.getBitcast(VT, V1);
}
}
@@ -8909,7 +8909,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
EVT ScalarVT = VT.getVectorElementType();
- if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
+ if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
@@ -8950,9 +8950,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
EVT ElemVT = VT.getScalarType();
SDValue SplatVal = Op.getOperand(0);
- if (useSVEForFixedLengthVectorVT(VT))
- return LowerToScalableOp(Op, DAG);
-
+ if (useSVEForFixedLengthVectorVT(VT))
+ return LowerToScalableOp(Op, DAG);
+
// Extend input splat value where needed to fit into a GPR (32b or 64b only)
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
@@ -9382,9 +9382,9 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
- return LowerToScalableOp(Op, DAG);
-
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+ return LowerToScalableOp(Op, DAG);
+
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
@@ -9543,18 +9543,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
bool isConstant = true;
bool AllLanesExtractElt = true;
unsigned NumConstantLanes = 0;
- unsigned NumDifferentLanes = 0;
- unsigned NumUndefLanes = 0;
+ unsigned NumDifferentLanes = 0;
+ unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
AllLanesExtractElt = false;
- if (V.isUndef()) {
- ++NumUndefLanes;
+ if (V.isUndef()) {
+ ++NumUndefLanes;
continue;
- }
+ }
if (i > 0)
isOnlyLowElement = false;
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
@@ -9570,10 +9570,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (!Value.getNode())
Value = V;
- else if (V != Value) {
+ else if (V != Value) {
usesOnlyOneValue = false;
- ++NumDifferentLanes;
- }
+ ++NumDifferentLanes;
+ }
}
if (!Value.getNode()) {
@@ -9699,20 +9699,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- // If we need to insert a small number of different non-constant elements and
- // the vector width is sufficiently large, prefer using DUP with the common
- // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
- // skip the constant lane handling below.
- bool PreferDUPAndInsert =
- !isConstant && NumDifferentLanes >= 1 &&
- NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
- NumDifferentLanes >= NumConstantLanes;
-
+ // If we need to insert a small number of different non-constant elements and
+ // the vector width is sufficiently large, prefer using DUP with the common
+ // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
+ // skip the constant lane handling below.
+ bool PreferDUPAndInsert =
+ !isConstant && NumDifferentLanes >= 1 &&
+ NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
+ NumDifferentLanes >= NumConstantLanes;
+
// If there was only one constant value used and for more than one lane,
// start by splatting that value, then replace the non-constant lanes. This
// is better than the default, which will perform a separate initialization
// for each lane.
- if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
+ if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
Val = ConstantBuildVector(Vec, DAG);
@@ -9748,22 +9748,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return shuffle;
}
- if (PreferDUPAndInsert) {
- // First, build a constant vector with the common element.
- SmallVector<SDValue, 8> Ops;
- for (unsigned I = 0; I < NumElts; ++I)
- Ops.push_back(Value);
- SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
- // Next, insert the elements that do not match the common value.
- for (unsigned I = 0; I < NumElts; ++I)
- if (Op.getOperand(I) != Value)
- NewVector =
- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
- Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
-
- return NewVector;
- }
-
+ if (PreferDUPAndInsert) {
+ // First, build a constant vector with the common element.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumElts; ++I)
+ Ops.push_back(Value);
+ SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
+ // Next, insert the elements that do not match the common value.
+ for (unsigned I = 0; I < NumElts; ++I)
+ if (Op.getOperand(I) != Value)
+ NewVector =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
+ Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
+
+ return NewVector;
+ }
+
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
@@ -9812,18 +9812,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
-SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getValueType().isScalableVector() &&
- isTypeLegal(Op.getValueType()) &&
- "Expected legal scalable vector type!");
-
- if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
- return Op;
-
- return SDValue();
-}
-
+SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getValueType().isScalableVector() &&
+ isTypeLegal(Op.getValueType()) &&
+ "Expected legal scalable vector type!");
+
+ if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
+ return Op;
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
@@ -9919,8 +9919,8 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
- if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
- InVT.getSizeInBits() == 128)
+ if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
+ InVT.getSizeInBits() == 128)
return Op;
return SDValue();
@@ -9934,34 +9934,34 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- if (InVT.isScalableVector()) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- if (!isTypeLegal(VT) || !VT.isInteger())
- return SDValue();
-
- SDValue Vec0 = Op.getOperand(0);
- SDValue Vec1 = Op.getOperand(1);
-
- // Ensure the subvector is half the size of the main vector.
- if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
- return SDValue();
-
- // Extend elements of smaller vector...
- EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
- SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
-
- if (Idx == 0) {
- SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
- return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
- } else if (Idx == InVT.getVectorMinNumElements()) {
- SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
- return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
- }
-
+ if (InVT.isScalableVector()) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ if (!isTypeLegal(VT) || !VT.isInteger())
+ return SDValue();
+
+ SDValue Vec0 = Op.getOperand(0);
+ SDValue Vec1 = Op.getOperand(1);
+
+ // Ensure the subvector is half the size of the main vector.
+ if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
+ return SDValue();
+
+ // Extend elements of smaller vector...
+ EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
+ SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+
+ if (Idx == 0) {
+ SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
+ } else if (Idx == InVT.getVectorMinNumElements()) {
+ SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
+ }
+
return SDValue();
- }
+ }
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
@@ -9970,42 +9970,42 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return SDValue();
}
-SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
-
- if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
- return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
-
- assert(VT.isScalableVector() && "Expected a scalable vector.");
-
- bool Signed = Op.getOpcode() == ISD::SDIV;
- unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
-
- if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
- return LowerToPredicatedOp(Op, DAG, PredOpcode);
-
- // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
- // operations, and truncate the result.
- EVT WidenedVT;
- if (VT == MVT::nxv16i8)
- WidenedVT = MVT::nxv8i16;
- else if (VT == MVT::nxv8i16)
- WidenedVT = MVT::nxv4i32;
- else
- llvm_unreachable("Unexpected Custom DIV operation");
-
- SDLoc dl(Op);
- unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
- unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
- SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
- SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
- SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
- SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
- SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
- SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
- return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
-}
-
+SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
+ return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
+
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
+
+ bool Signed = Op.getOpcode() == ISD::SDIV;
+ unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+
+ if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
+ return LowerToPredicatedOp(Op, DAG, PredOpcode);
+
+ // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
+ // operations, and truncate the result.
+ EVT WidenedVT;
+ if (VT == MVT::nxv16i8)
+ WidenedVT = MVT::nxv8i16;
+ else if (VT == MVT::nxv8i16)
+ WidenedVT = MVT::nxv4i32;
+ else
+ llvm_unreachable("Unexpected Custom DIV operation");
+
+ SDLoc dl(Op);
+ unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+ unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
+ SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
+ SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
+ SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
+ SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
+ SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
+ SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
+}
+
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
@@ -10105,12 +10105,12 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
}
if (!VT.isVector() || VT.isScalableVector())
- return SDValue();
+ return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
- return SDValue();
+ return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
@@ -10128,8 +10128,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
@@ -10140,9 +10140,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
- unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
- : AArch64ISD::SRL_PRED;
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
+ unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
+ : AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
}
@@ -10194,7 +10194,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
- return DAG.getNOT(dl, Fcmeq, VT);
+ return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
@@ -10233,7 +10233,7 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
else
Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
- return DAG.getNOT(dl, Cmeq, VT);
+ return DAG.getNOT(dl, Cmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
@@ -10274,9 +10274,9 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
}
- if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
- return LowerFixedLengthVectorSetccToSVE(Op, DAG);
-
+ if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
+ return LowerFixedLengthVectorSetccToSVE(Op, DAG);
+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -10349,51 +10349,51 @@ static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
-
- // Try to lower fixed length reductions to SVE.
- EVT SrcVT = Src.getValueType();
- bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
- Op.getOpcode() == ISD::VECREDUCE_OR ||
- Op.getOpcode() == ISD::VECREDUCE_XOR ||
- Op.getOpcode() == ISD::VECREDUCE_FADD ||
- (Op.getOpcode() != ISD::VECREDUCE_ADD &&
- SrcVT.getVectorElementType() == MVT::i64);
- if (SrcVT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
-
- if (SrcVT.getVectorElementType() == MVT::i1)
- return LowerPredReductionToSVE(Op, DAG);
-
- switch (Op.getOpcode()) {
- case ISD::VECREDUCE_ADD:
- return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
- case ISD::VECREDUCE_AND:
- return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
- case ISD::VECREDUCE_OR:
- return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
- case ISD::VECREDUCE_SMAX:
- return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
- case ISD::VECREDUCE_SMIN:
- return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
- case ISD::VECREDUCE_UMAX:
- return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
- case ISD::VECREDUCE_UMIN:
- return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
- case ISD::VECREDUCE_XOR:
- return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
- case ISD::VECREDUCE_FADD:
- return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
- case ISD::VECREDUCE_FMAX:
- return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
- case ISD::VECREDUCE_FMIN:
- return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
- default:
- llvm_unreachable("Unhandled fixed length reduction");
- }
- }
-
- // Lower NEON reductions.
+ SDValue Src = Op.getOperand(0);
+
+ // Try to lower fixed length reductions to SVE.
+ EVT SrcVT = Src.getValueType();
+ bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
+ Op.getOpcode() == ISD::VECREDUCE_OR ||
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VECREDUCE_FADD ||
+ (Op.getOpcode() != ISD::VECREDUCE_ADD &&
+ SrcVT.getVectorElementType() == MVT::i64);
+ if (SrcVT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
+
+ if (SrcVT.getVectorElementType() == MVT::i1)
+ return LowerPredReductionToSVE(Op, DAG);
+
+ switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_AND:
+ return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_OR:
+ return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
+ case ISD::VECREDUCE_SMAX:
+ return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
+ case ISD::VECREDUCE_SMIN:
+ return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
+ case ISD::VECREDUCE_UMAX:
+ return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
+ case ISD::VECREDUCE_UMIN:
+ return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
+ case ISD::VECREDUCE_XOR:
+ return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FADD:
+ return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMAX:
+ return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMIN:
+ return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
+ default:
+ llvm_unreachable("Unhandled fixed length reduction");
+ }
+ }
+
+ // Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
@@ -10410,13 +10410,13 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
- Src);
+ Src);
}
case ISD::VECREDUCE_FMIN: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
- Src);
+ Src);
}
default:
llvm_unreachable("Unhandled reduction");
@@ -10426,7 +10426,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
- if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
+ if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
@@ -10443,7 +10443,7 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
- if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
+ if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
@@ -10544,17 +10544,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
-static bool
-setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
- AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
+static bool
+setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
+ AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
- const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
+ const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
- assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
+ assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
@@ -10577,11 +10577,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
- return setInfoSVEStN<2>(*this, DL, Info, I);
+ return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
- return setInfoSVEStN<3>(*this, DL, Info, I);
+ return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
- return setInfoSVEStN<4>(*this, DL, Info, I);
+ return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
@@ -10737,15 +10737,15 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
- uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
- uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
+ uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
+ uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
- uint64_t NumBits1 = VT1.getFixedSizeInBits();
- uint64_t NumBits2 = VT2.getFixedSizeInBits();
+ uint64_t NumBits1 = VT1.getFixedSizeInBits();
+ uint64_t NumBits2 = VT2.getFixedSizeInBits();
return NumBits1 > NumBits2;
}
@@ -10987,43 +10987,43 @@ bool AArch64TargetLowering::shouldSinkOperands(
return true;
}
- case Instruction::Mul: {
- bool IsProfitable = false;
- for (auto &Op : I->operands()) {
- // Make sure we are not already sinking this operand
- if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
- continue;
-
- ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
- if (!Shuffle || !Shuffle->isZeroEltSplat())
- continue;
-
- Value *ShuffleOperand = Shuffle->getOperand(0);
- InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
- if (!Insert)
- continue;
-
- Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
- if (!OperandInstr)
- continue;
-
- ConstantInt *ElementConstant =
- dyn_cast<ConstantInt>(Insert->getOperand(2));
- // Check that the insertelement is inserting into element 0
- if (!ElementConstant || ElementConstant->getZExtValue() != 0)
- continue;
-
- unsigned Opcode = OperandInstr->getOpcode();
- if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
- continue;
-
- Ops.push_back(&Shuffle->getOperandUse(0));
- Ops.push_back(&Op);
- IsProfitable = true;
- }
-
- return IsProfitable;
- }
+ case Instruction::Mul: {
+ bool IsProfitable = false;
+ for (auto &Op : I->operands()) {
+ // Make sure we are not already sinking this operand
+ if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
+ if (!Shuffle || !Shuffle->isZeroEltSplat())
+ continue;
+
+ Value *ShuffleOperand = Shuffle->getOperand(0);
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
+ if (!Insert)
+ continue;
+
+ Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (!OperandInstr)
+ continue;
+
+ ConstantInt *ElementConstant =
+ dyn_cast<ConstantInt>(Insert->getOperand(2));
+ // Check that the insertelement is inserting into element 0
+ if (!ElementConstant || ElementConstant->getZExtValue() != 0)
+ continue;
+
+ unsigned Opcode = OperandInstr->getOpcode();
+ if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
+ continue;
+
+ Ops.push_back(&Shuffle->getOperandUse(0));
+ Ops.push_back(&Op);
+ IsProfitable = true;
+ }
+
+ return IsProfitable;
+ }
default:
return false;
}
@@ -11359,12 +11359,12 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
- assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
+ assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
- EVT SplitVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- VT.getVectorElementCount().divideCoefficientBy(N));
+ EVT SplitVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount().divideCoefficientBy(N));
assert(isTypeLegal(SplitVT));
SmallVector<EVT, 5> VTs(N, SplitVT);
@@ -11655,86 +11655,86 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
}
-// VECREDUCE_ADD( EXTEND(v16i8_type) ) to
-// VECREDUCE_ADD( DOTv16i8(v16i8_type) )
-static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
- const AArch64Subtarget *ST) {
- SDValue Op0 = N->getOperand(0);
- if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32)
- return SDValue();
-
- if (Op0.getValueType().getVectorElementType() != MVT::i32)
- return SDValue();
-
- unsigned ExtOpcode = Op0.getOpcode();
- if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
- return SDValue();
-
- EVT Op0VT = Op0.getOperand(0).getValueType();
- if (Op0VT != MVT::v16i8)
- return SDValue();
-
- SDLoc DL(Op0);
- SDValue Ones = DAG.getConstant(1, DL, Op0VT);
- SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
- auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND)
- ? Intrinsic::aarch64_neon_udot
- : Intrinsic::aarch64_neon_sdot;
- SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(),
- DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros,
- Ones, Op0.getOperand(0));
- return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
-}
-
-// Given a ABS node, detect the following pattern:
-// (ABS (SUB (EXTEND a), (EXTEND b))).
-// Generates UABD/SABD instruction.
-static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *Subtarget) {
- SDValue AbsOp1 = N->getOperand(0);
- SDValue Op0, Op1;
-
- if (AbsOp1.getOpcode() != ISD::SUB)
- return SDValue();
-
- Op0 = AbsOp1.getOperand(0);
- Op1 = AbsOp1.getOperand(1);
-
- unsigned Opc0 = Op0.getOpcode();
- // Check if the operands of the sub are (zero|sign)-extended.
- if (Opc0 != Op1.getOpcode() ||
- (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
- return SDValue();
-
- EVT VectorT1 = Op0.getOperand(0).getValueType();
- EVT VectorT2 = Op1.getOperand(0).getValueType();
- // Check if vectors are of same type and valid size.
- uint64_t Size = VectorT1.getFixedSizeInBits();
- if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
- return SDValue();
-
- // Check if vector element types are valid.
- EVT VT1 = VectorT1.getVectorElementType();
- if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
- return SDValue();
-
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- unsigned ABDOpcode =
- (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD;
- SDValue ABD =
- DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
-}
-
+// VECREDUCE_ADD( EXTEND(v16i8_type) ) to
+// VECREDUCE_ADD( DOTv16i8(v16i8_type) )
+static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *ST) {
+ SDValue Op0 = N->getOperand(0);
+ if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ if (Op0.getValueType().getVectorElementType() != MVT::i32)
+ return SDValue();
+
+ unsigned ExtOpcode = Op0.getOpcode();
+ if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
+ return SDValue();
+
+ EVT Op0VT = Op0.getOperand(0).getValueType();
+ if (Op0VT != MVT::v16i8)
+ return SDValue();
+
+ SDLoc DL(Op0);
+ SDValue Ones = DAG.getConstant(1, DL, Op0VT);
+ SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
+ auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND)
+ ? Intrinsic::aarch64_neon_udot
+ : Intrinsic::aarch64_neon_sdot;
+ SDValue Dot = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Zeros.getValueType(),
+ DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros,
+ Ones, Op0.getOperand(0));
+ return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
+}
+
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (EXTEND a), (EXTEND b))).
+// Generates UABD/SABD instruction.
+static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SDValue AbsOp1 = N->getOperand(0);
+ SDValue Op0, Op1;
+
+ if (AbsOp1.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ Op0 = AbsOp1.getOperand(0);
+ Op1 = AbsOp1.getOperand(1);
+
+ unsigned Opc0 = Op0.getOpcode();
+ // Check if the operands of the sub are (zero|sign)-extended.
+ if (Opc0 != Op1.getOpcode() ||
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+ return SDValue();
+
+ EVT VectorT1 = Op0.getOperand(0).getValueType();
+ EVT VectorT2 = Op1.getOperand(0).getValueType();
+ // Check if vectors are of same type and valid size.
+ uint64_t Size = VectorT1.getFixedSizeInBits();
+ if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
+ return SDValue();
+
+ // Check if vector element types are valid.
+ EVT VT1 = VectorT1.getVectorElementType();
+ if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ unsigned ABDOpcode =
+ (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD;
+ SDValue ABD =
+ DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+}
+
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
+ return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
}
SDValue
@@ -11793,157 +11793,157 @@ static bool IsSVECntIntrinsic(SDValue S) {
return false;
}
-/// Calculates what the pre-extend type is, based on the extension
-/// operation node provided by \p Extend.
-///
-/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
-/// pre-extend type is pulled directly from the operand, while other extend
-/// operations need a bit more inspection to get this information.
-///
-/// \param Extend The SDNode from the DAG that represents the extend operation
-/// \param DAG The SelectionDAG hosting the \p Extend node
-///
-/// \returns The type representing the \p Extend source type, or \p MVT::Other
-/// if no valid type can be determined
-static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
- switch (Extend.getOpcode()) {
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- return Extend.getOperand(0).getValueType();
- case ISD::AssertSext:
- case ISD::AssertZext:
- case ISD::SIGN_EXTEND_INREG: {
- VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
- if (!TypeNode)
- return MVT::Other;
- return TypeNode->getVT();
- }
- case ISD::AND: {
- ConstantSDNode *Constant =
- dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
- if (!Constant)
- return MVT::Other;
-
- uint32_t Mask = Constant->getZExtValue();
-
- if (Mask == UCHAR_MAX)
- return MVT::i8;
- else if (Mask == USHRT_MAX)
- return MVT::i16;
- else if (Mask == UINT_MAX)
- return MVT::i32;
-
- return MVT::Other;
- }
- default:
- return MVT::Other;
- }
-
- llvm_unreachable("Code path unhandled in calculatePreExtendType!");
-}
-
-/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
-/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
-static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
- SelectionDAG &DAG) {
-
- ShuffleVectorSDNode *ShuffleNode =
- dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
- if (!ShuffleNode)
- return SDValue();
-
- // Ensuring the mask is zero before continuing
- if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
- return SDValue();
-
- SDValue InsertVectorElt = VectorShuffle.getOperand(0);
-
- if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
-
- SDValue InsertLane = InsertVectorElt.getOperand(2);
- ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
- // Ensures the insert is inserting into lane 0
- if (!Constant || Constant->getZExtValue() != 0)
- return SDValue();
-
- SDValue Extend = InsertVectorElt.getOperand(1);
- unsigned ExtendOpcode = Extend.getOpcode();
-
- bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
- ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
- ExtendOpcode == ISD::AssertSext;
- if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
- ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
- return SDValue();
-
- EVT TargetType = VectorShuffle.getValueType();
- EVT PreExtendType = calculatePreExtendType(Extend, DAG);
-
- if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
- TargetType != MVT::v2i64) ||
- (PreExtendType == MVT::Other))
- return SDValue();
-
- // Restrict valid pre-extend data type
- if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
- PreExtendType != MVT::i32)
- return SDValue();
-
- EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
-
- if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
- return SDValue();
-
- if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
- return SDValue();
-
- SDLoc DL(VectorShuffle);
-
- SDValue InsertVectorNode = DAG.getNode(
- InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
- DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
- DAG.getConstant(0, DL, MVT::i64));
-
- std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
-
- SDValue VectorShuffleNode =
- DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
- DAG.getUNDEF(PreExtendVT), ShuffleMask);
-
- SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- DL, TargetType, VectorShuffleNode);
-
- return ExtendNode;
-}
-
-/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
-/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
-static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
- // If the value type isn't a vector, none of the operands are going to be dups
- if (!Mul->getValueType(0).isVector())
- return SDValue();
-
- SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
- SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
-
- // Neither operands have been changed, don't make any further changes
- if (!Op0 && !Op1)
- return SDValue();
-
- SDLoc DL(Mul);
- return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
- Op0 ? Op0 : Mul->getOperand(0),
- Op1 ? Op1 : Mul->getOperand(1));
-}
-
+/// Calculates what the pre-extend type is, based on the extension
+/// operation node provided by \p Extend.
+///
+/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
+/// pre-extend type is pulled directly from the operand, while other extend
+/// operations need a bit more inspection to get this information.
+///
+/// \param Extend The SDNode from the DAG that represents the extend operation
+/// \param DAG The SelectionDAG hosting the \p Extend node
+///
+/// \returns The type representing the \p Extend source type, or \p MVT::Other
+/// if no valid type can be determined
+static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
+ switch (Extend.getOpcode()) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return Extend.getOperand(0).getValueType();
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::SIGN_EXTEND_INREG: {
+ VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
+ if (!TypeNode)
+ return MVT::Other;
+ return TypeNode->getVT();
+ }
+ case ISD::AND: {
+ ConstantSDNode *Constant =
+ dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
+ if (!Constant)
+ return MVT::Other;
+
+ uint32_t Mask = Constant->getZExtValue();
+
+ if (Mask == UCHAR_MAX)
+ return MVT::i8;
+ else if (Mask == USHRT_MAX)
+ return MVT::i16;
+ else if (Mask == UINT_MAX)
+ return MVT::i32;
+
+ return MVT::Other;
+ }
+ default:
+ return MVT::Other;
+ }
+
+ llvm_unreachable("Code path unhandled in calculatePreExtendType!");
+}
+
+/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
+ SelectionDAG &DAG) {
+
+ ShuffleVectorSDNode *ShuffleNode =
+ dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
+ if (!ShuffleNode)
+ return SDValue();
+
+ // Ensuring the mask is zero before continuing
+ if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
+ return SDValue();
+
+ SDValue InsertVectorElt = VectorShuffle.getOperand(0);
+
+ if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue InsertLane = InsertVectorElt.getOperand(2);
+ ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
+ // Ensures the insert is inserting into lane 0
+ if (!Constant || Constant->getZExtValue() != 0)
+ return SDValue();
+
+ SDValue Extend = InsertVectorElt.getOperand(1);
+ unsigned ExtendOpcode = Extend.getOpcode();
+
+ bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
+ ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
+ ExtendOpcode == ISD::AssertSext;
+ if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
+ ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
+ return SDValue();
+
+ EVT TargetType = VectorShuffle.getValueType();
+ EVT PreExtendType = calculatePreExtendType(Extend, DAG);
+
+ if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
+ TargetType != MVT::v2i64) ||
+ (PreExtendType == MVT::Other))
+ return SDValue();
+
+ // Restrict valid pre-extend data type
+ if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
+ PreExtendType != MVT::i32)
+ return SDValue();
+
+ EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
+
+ if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
+ return SDValue();
+
+ if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
+ return SDValue();
+
+ SDLoc DL(VectorShuffle);
+
+ SDValue InsertVectorNode = DAG.getNode(
+ InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
+ DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
+ DAG.getConstant(0, DL, MVT::i64));
+
+ std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
+
+ SDValue VectorShuffleNode =
+ DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
+ DAG.getUNDEF(PreExtendVT), ShuffleMask);
+
+ SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ DL, TargetType, VectorShuffleNode);
+
+ return ExtendNode;
+}
+
+/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
+ // If the value type isn't a vector, none of the operands are going to be dups
+ if (!Mul->getValueType(0).isVector())
+ return SDValue();
+
+ SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
+ SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
+
+ // Neither operands have been changed, don't make any further changes
+ if (!Op0 && !Op1)
+ return SDValue();
+
+ SDLoc DL(Mul);
+ return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
+ Op0 ? Op0 : Mul->getOperand(0),
+ Op1 ? Op1 : Mul->getOperand(1));
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
-
- if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
- return Ext;
-
+
+ if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
+ return Ext;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -12478,9 +12478,9 @@ static SDValue performSVEAndCombine(SDNode *N,
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
- if (!EnableCombineMGatherIntrinsics)
- return SDValue();
-
+ if (!EnableCombineMGatherIntrinsics)
+ return SDValue();
+
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
@@ -12534,11 +12534,11 @@ static SDValue performANDCombine(SDNode *N,
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
- // The combining code below works only for NEON vectors. In particular, it
- // does not work for SVE when dealing with vectors wider than 128 bits.
- if (!(VT.is64BitVector() || VT.is128BitVector()))
- return SDValue();
-
+ // The combining code below works only for NEON vectors. In particular, it
+ // does not work for SVE when dealing with vectors wider than 128 bits.
+ if (!(VT.is64BitVector() || VT.is128BitVector()))
+ return SDValue();
+
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
if (!BVN)
@@ -12599,143 +12599,143 @@ static SDValue performSRLCombine(SDNode *N,
return SDValue();
}
-// Attempt to form urhadd(OpA, OpB) from
-// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
-// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
-// The original form of the first expression is
-// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
-// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
-// Before this function is called the srl will have been lowered to
-// AArch64ISD::VLSHR.
-// This pass can also recognize signed variants of the patterns that use sign
-// extension instead of zero extension and form a srhadd(OpA, OpB) or a
-// shadd(OpA, OpB) from them.
-static SDValue
-performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
-
- // Since we are looking for a right shift by a constant value of 1 and we are
- // operating on types at least 16 bits in length (sign/zero extended OpA and
- // OpB, which are at least 8 bits), it follows that the truncate will always
- // discard the shifted-in bit and therefore the right shift will be logical
- // regardless of the signedness of OpA and OpB.
- SDValue Shift = N->getOperand(0);
- if (Shift.getOpcode() != AArch64ISD::VLSHR)
- return SDValue();
-
- // Is the right shift using an immediate value of 1?
- uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
- if (ShiftAmount != 1)
- return SDValue();
-
- SDValue ExtendOpA, ExtendOpB;
- SDValue ShiftOp0 = Shift.getOperand(0);
- unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
- if (ShiftOp0Opc == ISD::SUB) {
-
- SDValue Xor = ShiftOp0.getOperand(1);
- if (Xor.getOpcode() != ISD::XOR)
- return SDValue();
-
- // Is the XOR using a constant amount of all ones in the right hand side?
- uint64_t C;
- if (!isAllConstantBuildVector(Xor.getOperand(1), C))
- return SDValue();
-
- unsigned ElemSizeInBits = VT.getScalarSizeInBits();
- APInt CAsAPInt(ElemSizeInBits, C);
- if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
- return SDValue();
-
- ExtendOpA = Xor.getOperand(0);
- ExtendOpB = ShiftOp0.getOperand(0);
- } else if (ShiftOp0Opc == ISD::ADD) {
- ExtendOpA = ShiftOp0.getOperand(0);
- ExtendOpB = ShiftOp0.getOperand(1);
- } else
- return SDValue();
-
- unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
- unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
- if (!(ExtendOpAOpc == ExtendOpBOpc &&
- (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
- return SDValue();
-
- // Is the result of the right shift being truncated to the same value type as
- // the original operands, OpA and OpB?
- SDValue OpA = ExtendOpA.getOperand(0);
- SDValue OpB = ExtendOpB.getOperand(0);
- EVT OpAVT = OpA.getValueType();
- assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
- if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
- return SDValue();
-
- SDLoc DL(N);
- bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
- bool IsRHADD = ShiftOp0Opc == ISD::SUB;
- unsigned HADDOpc = IsSignExtend
- ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
- : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
- SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
-
- return ResultHADD;
-}
-
-static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
- switch (Opcode) {
- case ISD::FADD:
- return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
- case ISD::ADD:
- return VT == MVT::i64;
- default:
- return false;
- }
-}
-
-static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
-
- EVT VT = N->getValueType(0);
- const bool FullFP16 =
- static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
-
- // Rewrite for pairwise fadd pattern
- // (f32 (extract_vector_elt
- // (fadd (vXf32 Other)
- // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
- // ->
- // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
- // (extract_vector_elt (vXf32 Other) 1))
- if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
- hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
- SDLoc DL(N0);
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
-
- ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
- SDValue Other = N00;
-
- // And handle the commutative case.
- if (!Shuffle) {
- Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
- Other = N01;
- }
-
- if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
- Other == Shuffle->getOperand(0)) {
- return DAG.getNode(N0->getOpcode(), DL, VT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
- DAG.getConstant(0, DL, MVT::i64)),
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
- DAG.getConstant(1, DL, MVT::i64)));
- }
- }
-
- return SDValue();
-}
-
+// Attempt to form urhadd(OpA, OpB) from
+// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
+// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
+// The original form of the first expression is
+// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
+// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
+// Before this function is called the srl will have been lowered to
+// AArch64ISD::VLSHR.
+// This pass can also recognize signed variants of the patterns that use sign
+// extension instead of zero extension and form a srhadd(OpA, OpB) or a
+// shadd(OpA, OpB) from them.
+static SDValue
+performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Since we are looking for a right shift by a constant value of 1 and we are
+ // operating on types at least 16 bits in length (sign/zero extended OpA and
+ // OpB, which are at least 8 bits), it follows that the truncate will always
+ // discard the shifted-in bit and therefore the right shift will be logical
+ // regardless of the signedness of OpA and OpB.
+ SDValue Shift = N->getOperand(0);
+ if (Shift.getOpcode() != AArch64ISD::VLSHR)
+ return SDValue();
+
+ // Is the right shift using an immediate value of 1?
+ uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
+ if (ShiftAmount != 1)
+ return SDValue();
+
+ SDValue ExtendOpA, ExtendOpB;
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
+ if (ShiftOp0Opc == ISD::SUB) {
+
+ SDValue Xor = ShiftOp0.getOperand(1);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ // Is the XOR using a constant amount of all ones in the right hand side?
+ uint64_t C;
+ if (!isAllConstantBuildVector(Xor.getOperand(1), C))
+ return SDValue();
+
+ unsigned ElemSizeInBits = VT.getScalarSizeInBits();
+ APInt CAsAPInt(ElemSizeInBits, C);
+ if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+ return SDValue();
+
+ ExtendOpA = Xor.getOperand(0);
+ ExtendOpB = ShiftOp0.getOperand(0);
+ } else if (ShiftOp0Opc == ISD::ADD) {
+ ExtendOpA = ShiftOp0.getOperand(0);
+ ExtendOpB = ShiftOp0.getOperand(1);
+ } else
+ return SDValue();
+
+ unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
+ unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
+ if (!(ExtendOpAOpc == ExtendOpBOpc &&
+ (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
+ return SDValue();
+
+ // Is the result of the right shift being truncated to the same value type as
+ // the original operands, OpA and OpB?
+ SDValue OpA = ExtendOpA.getOperand(0);
+ SDValue OpB = ExtendOpB.getOperand(0);
+ EVT OpAVT = OpA.getValueType();
+ assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
+ if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
+ return SDValue();
+
+ SDLoc DL(N);
+ bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
+ bool IsRHADD = ShiftOp0Opc == ISD::SUB;
+ unsigned HADDOpc = IsSignExtend
+ ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
+ : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
+ SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
+
+ return ResultHADD;
+}
+
+static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
+ switch (Opcode) {
+ case ISD::FADD:
+ return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
+ case ISD::ADD:
+ return VT == MVT::i64;
+ default:
+ return false;
+ }
+}
+
+static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
+
+ EVT VT = N->getValueType(0);
+ const bool FullFP16 =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
+
+ // Rewrite for pairwise fadd pattern
+ // (f32 (extract_vector_elt
+ // (fadd (vXf32 Other)
+ // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
+ // ->
+ // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
+ // (extract_vector_elt (vXf32 Other) 1))
+ if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
+ hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
+ SDLoc DL(N0);
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+
+ ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
+ SDValue Other = N00;
+
+ // And handle the commutative case.
+ if (!Shuffle) {
+ Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
+ Other = N01;
+ }
+
+ if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
+ Other == Shuffle->getOperand(0)) {
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(0, DL, MVT::i64)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
+ DAG.getConstant(1, DL, MVT::i64)));
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -12781,9 +12781,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
- // subvectors from the same original vectors. Combine these into a single
- // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
+ // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
+ // subvectors from the same original vectors. Combine these into a single
+ // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
// (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
// extract_subvector (v16i8 OpB,
// <0>))),
@@ -12793,8 +12793,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// ->
// (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
- (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
- N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
+ (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
+ N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
@@ -13099,43 +13099,43 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
-// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- // Only scalar integer and vector types.
- if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
- return SDValue();
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
- return SDValue();
-
- auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
- if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
- return SDValue();
-
- SDValue Op1 = LHS->getOperand(0);
- SDValue Op2 = RHS->getOperand(0);
- EVT OpVT1 = Op1.getValueType();
- EVT OpVT2 = Op2.getValueType();
- if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
- Op2.getOpcode() != AArch64ISD::UADDV ||
- OpVT1.getVectorElementType() != VT)
- return SDValue();
-
- SDValue Val1 = Op1.getOperand(0);
- SDValue Val2 = Op2.getOperand(0);
- EVT ValVT = Val1->getValueType(0);
- SDLoc DL(N);
- SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
- DAG.getConstant(0, DL, MVT::i64));
-}
-
+// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ // Only scalar integer and vector types.
+ if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
+ return SDValue();
+
+ auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
+ if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
+ return SDValue();
+
+ SDValue Op1 = LHS->getOperand(0);
+ SDValue Op2 = RHS->getOperand(0);
+ EVT OpVT1 = Op1.getValueType();
+ EVT OpVT2 = Op2.getValueType();
+ if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
+ Op2.getOpcode() != AArch64ISD::UADDV ||
+ OpVT1.getVectorElementType() != VT)
+ return SDValue();
+
+ SDValue Val1 = Op1.getOperand(0);
+ SDValue Val2 = Op2.getOperand(0);
+ EVT ValVT = Val1->getValueType(0);
+ SDLoc DL(N);
+ SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
@@ -13189,16 +13189,16 @@ static SDValue performAddSubLongCombine(SDNode *N,
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
-static SDValue performAddSubCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // Try to change sum of two reductions.
- if (SDValue Val = performUADDVCombine(N, DAG))
- return Val;
-
- return performAddSubLongCombine(N, DCI, DAG);
-}
-
+static SDValue performAddSubCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Try to change sum of two reductions.
+ if (SDValue Val = performUADDVCombine(N, DAG))
+ return Val;
+
+ return performAddSubLongCombine(N, DCI, DAG);
+}
+
// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.
//
@@ -13212,8 +13212,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
- SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
+ SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
+ SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
@@ -13231,9 +13231,9 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
return SDValue();
}
- if (IID == Intrinsic::not_intrinsic)
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
-
+ if (IID == Intrinsic::not_intrinsic)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS, RHS);
}
@@ -13374,8 +13374,8 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
- EVT ByteVT =
- EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
+ EVT ByteVT =
+ EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
// Convert everything to the domain of EXT (i.e bytes).
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
@@ -13475,25 +13475,25 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
return DAG.getZExtOrTrunc(Res, DL, VT);
}
-static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
- SelectionDAG &DAG) {
- SDLoc DL(N);
-
- SDValue Pred = N->getOperand(1);
- SDValue VecToReduce = N->getOperand(2);
-
- // NOTE: The integer reduction's result type is not always linked to the
- // operand's element type so we construct it from the intrinsic's result type.
- EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
- SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
-
- // SVE reductions set the whole vector register with the first element
- // containing the reduction result, which we'll now extract.
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
- Zero);
-}
-
+static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ SDValue Pred = N->getOperand(1);
+ SDValue VecToReduce = N->getOperand(2);
+
+ // NOTE: The integer reduction's result type is not always linked to the
+ // operand's element type so we construct it from the intrinsic's result type.
+ EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
+ SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
+
+ // SVE reductions set the whole vector register with the first element
+ // containing the reduction result, which we'll now extract.
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
+ Zero);
+}
+
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
@@ -13534,25 +13534,25 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
Zero);
}
-// If a merged operation has no inactive lanes we can relax it to a predicated
-// or unpredicated operation, which potentially allows better isel (perhaps
-// using immediate forms) or relaxing register reuse requirements.
-static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
- SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
- assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
- SDValue Pg = N->getOperand(1);
-
- // ISD way to specify an all active predicate.
- if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
- (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all))
- return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
- N->getOperand(2), N->getOperand(3));
-
- // FUTURE: SplatVector(true)
- return SDValue();
-}
-
+// If a merged operation has no inactive lanes we can relax it to a predicated
+// or unpredicated operation, which potentially allows better isel (perhaps
+// using immediate forms) or relaxing register reuse requirements.
+static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
+ assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
+ SDValue Pg = N->getOperand(1);
+
+ // ISD way to specify an all active predicate.
+ if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
+ (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all))
+ return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
+ N->getOperand(2), N->getOperand(3));
+
+ // FUTURE: SplatVector(true)
+ return SDValue();
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -13607,28 +13607,28 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
- case Intrinsic::aarch64_sve_saddv:
- // There is no i64 version of SADDV because the sign is irrelevant.
- if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
- return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
- else
- return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
- case Intrinsic::aarch64_sve_uaddv:
- return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_saddv:
+ // There is no i64 version of SADDV because the sign is irrelevant.
+ if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
+ return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
+ else
+ return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
+ case Intrinsic::aarch64_sve_uaddv:
+ return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
case Intrinsic::aarch64_sve_smaxv:
- return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_umaxv:
- return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_sminv:
- return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
case Intrinsic::aarch64_sve_uminv:
- return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
case Intrinsic::aarch64_sve_orv:
- return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
case Intrinsic::aarch64_sve_eorv:
- return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
- return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
+ return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:
@@ -13639,19 +13639,19 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_smin:
- return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin:
- return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smax:
- return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax:
- return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:
- return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:
- return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:
- return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
+ return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
@@ -13744,15 +13744,15 @@ static SDValue performExtendCombine(SDNode *N,
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
- N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
+ (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
+ N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
SDNode *ABDNode = N->getOperand(0).getNode();
- SDValue NewABD =
- tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ SDValue NewABD =
+ tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
// This is effectively a custom type legalization for AArch64.
@@ -14235,31 +14235,31 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
S->getMemOperand()->getFlags());
}
-static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
- SDLoc DL(N);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- EVT ResVT = N->getValueType(0);
-
- // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
- if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
- if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
- SDValue X = Op0.getOperand(0).getOperand(0);
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
- }
- }
-
- // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
- if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
- if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
- SDValue Z = Op1.getOperand(0).getOperand(1);
- return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
- }
- }
-
- return SDValue();
-}
-
+static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT ResVT = N->getValueType(0);
+
+ // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
+ if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
+ if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
+ SDValue X = Op0.getOperand(0).getOperand(0);
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
+ }
+ }
+
+ // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
+ if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
+ if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
+ SDValue Z = Op1.getOperand(0).getOperand(1);
+ return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
+ }
+ }
+
+ return SDValue();
+}
+
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
@@ -14398,55 +14398,55 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
-static SDValue performMaskedGatherScatterCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
- assert(MGS && "Can only combine gather load or scatter store nodes");
-
- SDLoc DL(MGS);
- SDValue Chain = MGS->getChain();
- SDValue Scale = MGS->getScale();
- SDValue Index = MGS->getIndex();
- SDValue Mask = MGS->getMask();
- SDValue BasePtr = MGS->getBasePtr();
- ISD::MemIndexType IndexType = MGS->getIndexType();
-
- EVT IdxVT = Index.getValueType();
-
- if (DCI.isBeforeLegalize()) {
- // SVE gather/scatter requires indices of i32/i64. Promote anything smaller
- // prior to legalisation so the result can be split if required.
- if ((IdxVT.getVectorElementType() == MVT::i8) ||
- (IdxVT.getVectorElementType() == MVT::i16)) {
- EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32);
- if (MGS->isIndexSigned())
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
- else
- Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index);
-
- if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
- SDValue PassThru = MGT->getPassThru();
- SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale };
- return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- PassThru.getValueType(), DL, Ops,
- MGT->getMemOperand(),
- MGT->getIndexType(), MGT->getExtensionType());
- } else {
- auto *MSC = cast<MaskedScatterSDNode>(MGS);
- SDValue Data = MSC->getValue();
- SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale };
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), DL, Ops,
- MSC->getMemOperand(), IndexType,
- MSC->isTruncatingStore());
- }
- }
- }
-
- return SDValue();
-}
-
+static SDValue performMaskedGatherScatterCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
+ assert(MGS && "Can only combine gather load or scatter store nodes");
+
+ SDLoc DL(MGS);
+ SDValue Chain = MGS->getChain();
+ SDValue Scale = MGS->getScale();
+ SDValue Index = MGS->getIndex();
+ SDValue Mask = MGS->getMask();
+ SDValue BasePtr = MGS->getBasePtr();
+ ISD::MemIndexType IndexType = MGS->getIndexType();
+
+ EVT IdxVT = Index.getValueType();
+
+ if (DCI.isBeforeLegalize()) {
+ // SVE gather/scatter requires indices of i32/i64. Promote anything smaller
+ // prior to legalisation so the result can be split if required.
+ if ((IdxVT.getVectorElementType() == MVT::i8) ||
+ (IdxVT.getVectorElementType() == MVT::i16)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32);
+ if (MGS->isIndexSigned())
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ else
+ Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index);
+
+ if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
+ SDValue PassThru = MGT->getPassThru();
+ SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale };
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(),
+ MGT->getIndexType(), MGT->getExtensionType());
+ } else {
+ auto *MSC = cast<MaskedScatterSDNode>(MGS);
+ SDValue Data = MSC->getValue();
+ SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale };
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), DL, Ops,
+ MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -15443,7 +15443,7 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
"Sign extending from an invalid type");
- EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
ExtOp, DAG.getValueType(ExtVT));
@@ -15451,12 +15451,12 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
}
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- if (!EnableCombineMGatherIntrinsics)
- return SDValue();
-
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ if (!EnableCombineMGatherIntrinsics)
+ return SDValue();
+
// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
@@ -15596,11 +15596,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
- case ISD::ABS:
- return performABSCombine(N, DAG, DCI, Subtarget);
+ case ISD::ABS:
+ return performABSCombine(N, DAG, DCI, Subtarget);
case ISD::ADD:
case ISD::SUB:
- return performAddSubCombine(N, DCI, DAG);
+ return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -15627,8 +15627,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
- case ISD::TRUNCATE:
- return performVectorTruncateCombine(N, DCI, DAG);
+ case ISD::TRUNCATE:
+ return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
@@ -15641,9 +15641,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
- case ISD::MGATHER:
- case ISD::MSCATTER:
- return performMaskedGatherScatterCombine(N, DCI, DAG);
+ case ISD::MGATHER:
+ case ISD::MSCATTER:
+ return performMaskedGatherScatterCombine(N, DCI, DAG);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
@@ -15655,14 +15655,14 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
- case AArch64ISD::UZP1:
- return performUzpCombine(N, DAG);
+ case AArch64ISD::UZP1:
+ return performUzpCombine(N, DAG);
case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);
- case ISD::EXTRACT_VECTOR_ELT:
- return performExtractVectorEltCombine(N, DAG);
- case ISD::VECREDUCE_ADD:
- return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return performExtractVectorEltCombine(N, DAG);
+ case ISD::VECREDUCE_ADD:
+ return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -15811,10 +15811,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
- uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
- SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
+ uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
+ SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
@@ -15825,11 +15825,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
SDValue Vec = N->getOperand(4);
EVT TupleVT = Tuple.getValueType();
- uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
+ uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
- uint64_t NumLanes =
- Vec.getValueType().getVectorElementCount().getKnownMinValue();
+ uint64_t NumLanes =
+ Vec.getValueType().getVectorElementCount().getKnownMinValue();
if ((TupleLanes % NumLanes) != 0)
report_fatal_error("invalid tuple vector!");
@@ -15841,9 +15841,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (I == IdxConst)
Opnds.push_back(Vec);
else {
- SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
- Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
- Vec.getValueType(), Tuple, ExtIdx));
+ SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
+ Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+ Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
@@ -16065,7 +16065,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
ElementCount ResEC = VT.getVectorElementCount();
- if (InVT.getVectorElementCount() != (ResEC * 2))
+ if (InVT.getVectorElementCount() != (ResEC * 2))
return;
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -16073,7 +16073,7 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
return;
unsigned Index = CIndex->getZExtValue();
- if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
+ if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
@@ -16108,7 +16108,7 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
- if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
+ if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
@@ -16189,8 +16189,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::CTPOP:
- if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
- Results.push_back(Result);
+ if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
+ Results.push_back(Result);
return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
@@ -16335,44 +16335,44 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
-
- // Nand is not supported in LSE.
- // Leave 128 bits to LLSC or CmpXChg.
- if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
- if (Subtarget->hasLSE())
- return AtomicExpansionKind::None;
- if (Subtarget->outlineAtomics()) {
- // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
- // Don't outline them unless
- // (1) high level <atomic> support approved:
- // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
- // (2) low level libgcc and compiler-rt support implemented by:
- // min/max outline atomics helpers
- if (AI->getOperation() != AtomicRMWInst::Min &&
- AI->getOperation() != AtomicRMWInst::Max &&
- AI->getOperation() != AtomicRMWInst::UMin &&
- AI->getOperation() != AtomicRMWInst::UMax) {
- return AtomicExpansionKind::None;
- }
- }
- }
-
- // At -O0, fast-regalloc cannot cope with the live vregs necessary to
- // implement atomicrmw without spilling. If the target address is also on the
- // stack and close enough to the spill slot, this can lead to a situation
- // where the monitor always gets cleared and the atomic operation can never
- // succeed. So at -O0 lower this operation to a CAS loop.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
- return AtomicExpansionKind::CmpXChg;
-
- return AtomicExpansionKind::LLSC;
+
+ // Nand is not supported in LSE.
+ // Leave 128 bits to LLSC or CmpXChg.
+ if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
+ if (Subtarget->hasLSE())
+ return AtomicExpansionKind::None;
+ if (Subtarget->outlineAtomics()) {
+ // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+ // Don't outline them unless
+ // (1) high level <atomic> support approved:
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+ // (2) low level libgcc and compiler-rt support implemented by:
+ // min/max outline atomics helpers
+ if (AI->getOperation() != AtomicRMWInst::Min &&
+ AI->getOperation() != AtomicRMWInst::Max &&
+ AI->getOperation() != AtomicRMWInst::UMin &&
+ AI->getOperation() != AtomicRMWInst::UMax) {
+ return AtomicExpansionKind::None;
+ }
+ }
+ }
+
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
+ return AtomicExpansionKind::LLSC;
}
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
- if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
+ if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
@@ -16883,92 +16883,92 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
Store->isTruncatingStore());
}
-SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
- SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
-
- bool Signed = Op.getOpcode() == ISD::SDIV;
- unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
-
- // Scalable vector i32/i64 DIV is supported.
- if (EltVT == MVT::i32 || EltVT == MVT::i64)
- return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
-
- // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
- EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
- EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
-
- // Convert the operands to scalable vectors.
- SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
- SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
-
- // Extend the scalable operands.
- unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
- unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
- SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
- SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
- SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
- SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
-
- // Convert back to fixed vectors so the DIV can be further lowered.
- Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
- Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
- Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
- Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
- SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
- Op0Lo, Op1Lo);
- SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
- Op0Hi, Op1Hi);
-
- // Convert again to scalable vectors to truncate.
- ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
- ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
- SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
- ResultLo, ResultHi);
-
- return convertFromScalableVector(DAG, VT, ScalableResult);
-}
-
-SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
- SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
-
- SDLoc DL(Op);
- SDValue Val = Op.getOperand(0);
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
- Val = convertToScalableVector(DAG, ContainerVT, Val);
-
- bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
- unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
-
- // Repeatedly unpack Val until the result is of the desired element type.
- switch (ContainerVT.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("unimplemented container type");
- case MVT::nxv16i8:
- Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
- if (VT.getVectorElementType() == MVT::i16)
- break;
- LLVM_FALLTHROUGH;
- case MVT::nxv8i16:
- Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
- if (VT.getVectorElementType() == MVT::i32)
- break;
- LLVM_FALLTHROUGH;
- case MVT::nxv4i32:
- Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
- assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
- break;
- }
-
- return convertFromScalableVector(DAG, VT, Val);
-}
-
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ bool Signed = Op.getOpcode() == ISD::SDIV;
+ unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+
+ // Scalable vector i32/i64 DIV is supported.
+ if (EltVT == MVT::i32 || EltVT == MVT::i64)
+ return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
+
+ // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
+ EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
+
+ // Convert the operands to scalable vectors.
+ SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
+
+ // Extend the scalable operands.
+ unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+ unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
+ SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
+ SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
+ SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
+ SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
+
+ // Convert back to fixed vectors so the DIV can be further lowered.
+ Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
+ Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
+ Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
+ Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
+ SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
+ Op0Lo, Op1Lo);
+ SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
+ Op0Hi, Op1Hi);
+
+ // Convert again to scalable vectors to truncate.
+ ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
+ ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
+ SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
+ ResultLo, ResultHi);
+
+ return convertFromScalableVector(DAG, VT, ScalableResult);
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
+
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
+ Val = convertToScalableVector(DAG, ContainerVT, Val);
+
+ bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
+ unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
+
+ // Repeatedly unpack Val until the result is of the desired element type.
+ switch (ContainerVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("unimplemented container type");
+ case MVT::nxv16i8:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
+ if (VT.getVectorElementType() == MVT::i16)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv8i16:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
+ if (VT.getVectorElementType() == MVT::i32)
+ break;
+ LLVM_FALLTHROUGH;
+ case MVT::nxv4i32:
+ Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
+ assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
+ break;
+ }
+
+ return convertFromScalableVector(DAG, VT, Val);
+}
+
SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -17005,21 +17005,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
return convertFromScalableVector(DAG, VT, Val);
}
-// Convert vector operation 'Op' to an equivalent predicated operation whereby
-// the original operation's type is used to construct a suitable predicate.
-// NOTE: The results for inactive lanes are undefined.
+// Convert vector operation 'Op' to an equivalent predicated operation whereby
+// the original operation's type is used to construct a suitable predicate.
+// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
- unsigned NewOp,
- bool OverrideNEON) const {
+ unsigned NewOp,
+ bool OverrideNEON) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
- if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
+ if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
- // Create list of operands by converting existing ones to scalable types.
+ // Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
if (isa<CondCodeSDNode>(V)) {
@@ -17027,21 +17027,21 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
continue;
}
- if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
- EVT VTArg = VTNode->getVT().getVectorElementType();
- EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
- Operands.push_back(DAG.getValueType(NewVTArg));
- continue;
- }
-
- assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
+ if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
+ EVT VTArg = VTNode->getVT().getVectorElementType();
+ EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
+ Operands.push_back(DAG.getValueType(NewVTArg));
+ continue;
+ }
+
+ assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
"Only fixed length vectors are supported!");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
- if (isMergePassthruOpcode(NewOp))
- Operands.push_back(DAG.getUNDEF(ContainerVT));
-
+ if (isMergePassthruOpcode(NewOp))
+ Operands.push_back(DAG.getUNDEF(ContainerVT));
+
auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
@@ -17050,228 +17050,228 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
- assert((!V.getValueType().isVector() ||
- V.getValueType().isScalableVector()) &&
+ assert((!V.getValueType().isVector() ||
+ V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
Operands.push_back(V);
}
- if (isMergePassthruOpcode(NewOp))
- Operands.push_back(DAG.getUNDEF(VT));
-
+ if (isMergePassthruOpcode(NewOp))
+ Operands.push_back(DAG.getUNDEF(VT));
+
return DAG.getNode(NewOp, DL, VT, Operands);
}
-
-// If a fixed length vector operation has no side effects when applied to
-// undefined elements, we can safely use scalable vectors to perform the same
-// operation without needing to worry about predication.
-SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- assert(useSVEForFixedLengthVectorVT(VT) &&
- "Only expected to lower fixed length vector operation!");
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
-
- // Create list of operands by converting existing ones to scalable types.
- SmallVector<SDValue, 4> Ops;
- for (const SDValue &V : Op->op_values()) {
- assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
-
- // Pass through non-vector operands.
- if (!V.getValueType().isVector()) {
- Ops.push_back(V);
- continue;
- }
-
- // "cast" fixed length vector to a scalable vector.
- assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
- "Only fixed length vectors are supported!");
- Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
- }
-
- auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
- return convertFromScalableVector(DAG, VT, ScalableRes);
-}
-
-SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
- SelectionDAG &DAG) const {
- SDLoc DL(ScalarOp);
- SDValue AccOp = ScalarOp.getOperand(0);
- SDValue VecOp = ScalarOp.getOperand(1);
- EVT SrcVT = VecOp.getValueType();
- EVT ResVT = SrcVT.getVectorElementType();
-
- EVT ContainerVT = SrcVT;
- if (SrcVT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
- VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
- }
-
- SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
-
- // Convert operands to Scalable.
- AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), AccOp, Zero);
-
- // Perform reduction.
- SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
- Pg, AccOp, VecOp);
-
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
-}
-
-SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
- SelectionDAG &DAG) const {
- SDLoc DL(ReduceOp);
- SDValue Op = ReduceOp.getOperand(0);
- EVT OpVT = Op.getValueType();
- EVT VT = ReduceOp.getValueType();
-
- if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
- return SDValue();
-
- SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
-
- switch (ReduceOp.getOpcode()) {
- default:
- return SDValue();
- case ISD::VECREDUCE_OR:
- return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
- case ISD::VECREDUCE_AND: {
- Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
- return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
- }
- case ISD::VECREDUCE_XOR: {
- SDValue ID =
- DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
- SDValue Cntp =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
- return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
- }
- }
-
- return SDValue();
-}
-
-SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
- SDValue ScalarOp,
- SelectionDAG &DAG) const {
- SDLoc DL(ScalarOp);
- SDValue VecOp = ScalarOp.getOperand(0);
- EVT SrcVT = VecOp.getValueType();
-
- if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
- VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
- }
-
- // UADDV always returns an i64 result.
- EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
- SrcVT.getVectorElementType();
- EVT RdxVT = SrcVT;
- if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
- RdxVT = getPackedSVEVectorVT(ResVT);
-
- SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
- SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
- SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
- Rdx, DAG.getConstant(0, DL, MVT::i64));
-
- // The VEC_REDUCE nodes expect an element size result.
- if (ResVT != ScalarOp.getValueType())
- Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
-
- return Res;
-}
-
-SDValue
-AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- EVT InVT = Op.getOperand(1).getValueType();
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
- SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
- SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
-
- // Convert the mask to a predicated (NOTE: We don't need to worry about
- // inactive lanes since VSELECT is safe when given undefined elements).
- EVT MaskVT = Op.getOperand(0).getValueType();
- EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
- auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
- Mask = DAG.getNode(ISD::TRUNCATE, DL,
- MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
-
- auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
- Mask, Op1, Op2);
-
- return convertFromScalableVector(DAG, VT, ScalableRes);
-}
-
-SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
- SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT InVT = Op.getOperand(0).getValueType();
- EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
-
- assert(useSVEForFixedLengthVectorVT(InVT) &&
- "Only expected to lower fixed length vector operation!");
- assert(Op.getValueType() == InVT.changeTypeToInteger() &&
- "Expected integer result of the same bit length as the inputs!");
-
- // Expand floating point vector comparisons.
- if (InVT.isFloatingPoint())
- return SDValue();
-
- auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
- auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
- auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
-
- EVT CmpVT = Pg.getValueType();
- auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
- {Pg, Op1, Op2, Op.getOperand(2)});
-
- EVT PromoteVT = ContainerVT.changeTypeToInteger();
- auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
- return convertFromScalableVector(DAG, Op.getValueType(), Promote);
-}
-
-SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT InVT = Op.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- (void)TLI;
-
- assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
- InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
- "Only expect to cast between legal scalable vector types!");
- assert((VT.getVectorElementType() == MVT::i1) ==
- (InVT.getVectorElementType() == MVT::i1) &&
- "Cannot cast between data and predicate scalable vector types!");
-
- if (InVT == VT)
- return Op;
-
- if (VT.getVectorElementType() == MVT::i1)
- return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
-
- EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
- EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
- assert((VT == PackedVT || InVT == PackedInVT) &&
- "Cannot cast between unpacked scalable vector types!");
-
- // Pack input if required.
- if (InVT != PackedInVT)
- Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
-
- Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
-
- // Unpack result if required.
- if (VT != PackedVT)
- Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
-
- return Op;
-}
+
+// If a fixed length vector operation has no side effects when applied to
+// undefined elements, we can safely use scalable vectors to perform the same
+// operation without needing to worry about predication.
+SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(useSVEForFixedLengthVectorVT(VT) &&
+ "Only expected to lower fixed length vector operation!");
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+
+ // Create list of operands by converting existing ones to scalable types.
+ SmallVector<SDValue, 4> Ops;
+ for (const SDValue &V : Op->op_values()) {
+ assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
+
+ // Pass through non-vector operands.
+ if (!V.getValueType().isVector()) {
+ Ops.push_back(V);
+ continue;
+ }
+
+ // "cast" fixed length vector to a scalable vector.
+ assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
+ "Only fixed length vectors are supported!");
+ Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
+ }
+
+ auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+}
+
+SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ScalarOp);
+ SDValue AccOp = ScalarOp.getOperand(0);
+ SDValue VecOp = ScalarOp.getOperand(1);
+ EVT SrcVT = VecOp.getValueType();
+ EVT ResVT = SrcVT.getVectorElementType();
+
+ EVT ContainerVT = SrcVT;
+ if (SrcVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
+ VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
+ }
+
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+
+ // Convert operands to Scalable.
+ AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), AccOp, Zero);
+
+ // Perform reduction.
+ SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
+ Pg, AccOp, VecOp);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
+}
+
+SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ReduceOp);
+ SDValue Op = ReduceOp.getOperand(0);
+ EVT OpVT = Op.getValueType();
+ EVT VT = ReduceOp.getValueType();
+
+ if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
+
+ switch (ReduceOp.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::VECREDUCE_OR:
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
+ case ISD::VECREDUCE_AND: {
+ Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
+ }
+ case ISD::VECREDUCE_XOR: {
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
+ SDValue Cntp =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
+ return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
+ SDValue ScalarOp,
+ SelectionDAG &DAG) const {
+ SDLoc DL(ScalarOp);
+ SDValue VecOp = ScalarOp.getOperand(0);
+ EVT SrcVT = VecOp.getValueType();
+
+ if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
+ VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
+ }
+
+ // UADDV always returns an i64 result.
+ EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
+ SrcVT.getVectorElementType();
+ EVT RdxVT = SrcVT;
+ if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
+ RdxVT = getPackedSVEVectorVT(ResVT);
+
+ SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
+ SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
+ Rdx, DAG.getConstant(0, DL, MVT::i64));
+
+ // The VEC_REDUCE nodes expect an element size result.
+ if (ResVT != ScalarOp.getValueType())
+ Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
+
+ return Res;
+}
+
+SDValue
+AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ EVT InVT = Op.getOperand(1).getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
+ SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
+
+ // Convert the mask to a predicated (NOTE: We don't need to worry about
+ // inactive lanes since VSELECT is safe when given undefined elements).
+ EVT MaskVT = Op.getOperand(0).getValueType();
+ EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
+ auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
+ Mask = DAG.getNode(ISD::TRUNCATE, DL,
+ MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
+
+ auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
+ Mask, Op1, Op2);
+
+ return convertFromScalableVector(DAG, VT, ScalableRes);
+}
+
+SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
+ SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+
+ assert(useSVEForFixedLengthVectorVT(InVT) &&
+ "Only expected to lower fixed length vector operation!");
+ assert(Op.getValueType() == InVT.changeTypeToInteger() &&
+ "Expected integer result of the same bit length as the inputs!");
+
+ // Expand floating point vector comparisons.
+ if (InVT.isFloatingPoint())
+ return SDValue();
+
+ auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
+
+ EVT CmpVT = Pg.getValueType();
+ auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
+ {Pg, Op1, Op2, Op.getOperand(2)});
+
+ EVT PromoteVT = ContainerVT.changeTypeToInteger();
+ auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
+ return convertFromScalableVector(DAG, Op.getValueType(), Promote);
+}
+
+SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT InVT = Op.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ (void)TLI;
+
+ assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
+ InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
+ "Only expect to cast between legal scalable vector types!");
+ assert((VT.getVectorElementType() == MVT::i1) ==
+ (InVT.getVectorElementType() == MVT::i1) &&
+ "Cannot cast between data and predicate scalable vector types!");
+
+ if (InVT == VT)
+ return Op;
+
+ if (VT.getVectorElementType() == MVT::i1)
+ return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
+
+ EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
+ EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
+ assert((VT == PackedVT || InVT == PackedInVT) &&
+ "Cannot cast between unpacked scalable vector types!");
+
+ // Pack input if required.
+ if (InVT != PackedInVT)
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
+
+ Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
+
+ // Unpack result if required.
+ if (VT != PackedVT)
+ Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
+
+ return Op;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
index 9550197159..535aa519f7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64ISelLowering.h
@@ -72,51 +72,51 @@ enum NodeType : unsigned {
ADC,
SBC, // adc, sbc instructions
- // Predicated instructions where inactive lanes produce undefined results.
+ // Predicated instructions where inactive lanes produce undefined results.
ADD_PRED,
FADD_PRED,
- FDIV_PRED,
- FMA_PRED,
- FMAXNM_PRED,
- FMINNM_PRED,
- FMUL_PRED,
- FSUB_PRED,
- MUL_PRED,
+ FDIV_PRED,
+ FMA_PRED,
+ FMAXNM_PRED,
+ FMINNM_PRED,
+ FMUL_PRED,
+ FSUB_PRED,
+ MUL_PRED,
SDIV_PRED,
- SHL_PRED,
- SMAX_PRED,
- SMIN_PRED,
- SRA_PRED,
- SRL_PRED,
- SUB_PRED,
+ SHL_PRED,
+ SMAX_PRED,
+ SMIN_PRED,
+ SRA_PRED,
+ SRL_PRED,
+ SUB_PRED,
UDIV_PRED,
- UMAX_PRED,
- UMIN_PRED,
-
- // Predicated instructions with the result of inactive lanes provided by the
- // last operand.
- FABS_MERGE_PASSTHRU,
- FCEIL_MERGE_PASSTHRU,
- FFLOOR_MERGE_PASSTHRU,
- FNEARBYINT_MERGE_PASSTHRU,
- FNEG_MERGE_PASSTHRU,
- FRECPX_MERGE_PASSTHRU,
- FRINT_MERGE_PASSTHRU,
- FROUND_MERGE_PASSTHRU,
- FROUNDEVEN_MERGE_PASSTHRU,
- FSQRT_MERGE_PASSTHRU,
- FTRUNC_MERGE_PASSTHRU,
- FP_ROUND_MERGE_PASSTHRU,
- FP_EXTEND_MERGE_PASSTHRU,
- UINT_TO_FP_MERGE_PASSTHRU,
- SINT_TO_FP_MERGE_PASSTHRU,
- FCVTZU_MERGE_PASSTHRU,
- FCVTZS_MERGE_PASSTHRU,
- SIGN_EXTEND_INREG_MERGE_PASSTHRU,
- ZERO_EXTEND_INREG_MERGE_PASSTHRU,
- ABS_MERGE_PASSTHRU,
- NEG_MERGE_PASSTHRU,
-
+ UMAX_PRED,
+ UMIN_PRED,
+
+ // Predicated instructions with the result of inactive lanes provided by the
+ // last operand.
+ FABS_MERGE_PASSTHRU,
+ FCEIL_MERGE_PASSTHRU,
+ FFLOOR_MERGE_PASSTHRU,
+ FNEARBYINT_MERGE_PASSTHRU,
+ FNEG_MERGE_PASSTHRU,
+ FRECPX_MERGE_PASSTHRU,
+ FRINT_MERGE_PASSTHRU,
+ FROUND_MERGE_PASSTHRU,
+ FROUNDEVEN_MERGE_PASSTHRU,
+ FSQRT_MERGE_PASSTHRU,
+ FTRUNC_MERGE_PASSTHRU,
+ FP_ROUND_MERGE_PASSTHRU,
+ FP_EXTEND_MERGE_PASSTHRU,
+ UINT_TO_FP_MERGE_PASSTHRU,
+ SINT_TO_FP_MERGE_PASSTHRU,
+ FCVTZU_MERGE_PASSTHRU,
+ FCVTZS_MERGE_PASSTHRU,
+ SIGN_EXTEND_INREG_MERGE_PASSTHRU,
+ ZERO_EXTEND_INREG_MERGE_PASSTHRU,
+ ABS_MERGE_PASSTHRU,
+ NEG_MERGE_PASSTHRU,
+
SETCC_MERGE_ZERO,
// Arithmetic instructions which write flags.
@@ -219,18 +219,18 @@ enum NodeType : unsigned {
SADDV,
UADDV,
- // Vector halving addition
- SHADD,
- UHADD,
-
+ // Vector halving addition
+ SHADD,
+ UHADD,
+
// Vector rounding halving addition
SRHADD,
URHADD,
- // Absolute difference
- UABD,
- SABD,
-
+ // Absolute difference
+ UABD,
+ SABD,
+
// Vector across-lanes min/max
// Only the lower result lane is defined.
SMINV,
@@ -238,8 +238,8 @@ enum NodeType : unsigned {
SMAXV,
UMAXV,
- SADDV_PRED,
- UADDV_PRED,
+ SADDV_PRED,
+ UADDV_PRED,
SMAXV_PRED,
UMAXV_PRED,
SMINV_PRED,
@@ -307,14 +307,14 @@ enum NodeType : unsigned {
PTEST,
PTRUE,
- BITREVERSE_MERGE_PASSTHRU,
- BSWAP_MERGE_PASSTHRU,
- CTLZ_MERGE_PASSTHRU,
- CTPOP_MERGE_PASSTHRU,
+ BITREVERSE_MERGE_PASSTHRU,
+ BSWAP_MERGE_PASSTHRU,
+ CTLZ_MERGE_PASSTHRU,
+ CTPOP_MERGE_PASSTHRU,
DUP_MERGE_PASSTHRU,
INDEX_VECTOR,
- // Cast between vectors of the same element type but differ in length.
+ // Cast between vectors of the same element type but differ in length.
REINTERPRET_CAST,
LD1_MERGE_ZERO,
@@ -424,11 +424,11 @@ enum NodeType : unsigned {
LDP,
STP,
- STNP,
-
- // Pseudo for a OBJC call that gets emitted together with a special `mov
- // x29, x29` marker instruction.
- CALL_RVMARKER
+ STNP,
+
+ // Pseudo for a OBJC call that gets emitted together with a special `mov
+ // x29, x29` marker instruction.
+ CALL_RVMARKER
};
} // end namespace AArch64ISD
@@ -438,14 +438,14 @@ namespace {
// Any instruction that defines a 32-bit result zeros out the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
+// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
+// 32 bits, they're probably just qualifying a CopyFromReg.
// FIXME: X86 also checks for CMOV here. Do we need something similar?
static inline bool isDef32(const SDNode &N) {
unsigned Opc = N.getOpcode();
return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
- Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
- Opc != ISD::AssertZext;
+ Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
+ Opc != ISD::AssertZext;
}
} // end anonymous namespace
@@ -784,7 +784,7 @@ public:
/// illegal as the original, thus leading to an infinite legalisation loop.
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
/// vector types this override can be removed.
- bool mergeStoresAfterLegalization(EVT VT) const override;
+ bool mergeStoresAfterLegalization(EVT VT) const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
@@ -815,11 +815,11 @@ private:
SDValue ThisVal) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
bool isEligibleForTailCallOptimization(
@@ -903,28 +903,28 @@ private:
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
- bool OverrideNEON = false) const;
- SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
+ bool OverrideNEON = false) const;
+ SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
@@ -939,17 +939,17 @@ private:
SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
- SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
- SelectionDAG &DAG) const;
- SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
- SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
+ SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
+ SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
- SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
- SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
- SelectionDAG &DAG) const;
- SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
+ SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
+ SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
+ SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;
@@ -961,10 +961,10 @@ private:
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const override;
- SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
- const DenormalMode &Mode) const override;
- SDValue getSqrtResultForDenormInput(SDValue Operand,
- SelectionDAG &DAG) const override;
+ SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
+ const DenormalMode &Mode) const override;
+ SDValue getSqrtResultForDenormInput(SDValue Operand,
+ SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
@@ -996,7 +996,7 @@ private:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
- bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+ bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
@@ -1023,21 +1023,21 @@ private:
bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;
- // Normally SVE is only used for byte size vectors that do not fit within a
- // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
- // used for 64bit and 128bit vectors as well.
- bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
-
- // With the exception of data-predicate transitions, no instructions are
- // required to cast between legal scalable vector types. However:
- // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
- // is not universally useable.
- // 2. Most unpacked integer types are not legal and thus integer extends
- // cannot be used to convert between unpacked and packed types.
- // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
- // to transition between unpacked and packed types of the same element type,
- // with BITCAST used otherwise.
- SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
+ // Normally SVE is only used for byte size vectors that do not fit within a
+ // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
+ // used for 64bit and 128bit vectors as well.
+ bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
+
+ // With the exception of data-predicate transitions, no instructions are
+ // required to cast between legal scalable vector types. However:
+ // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
+ // is not universally useable.
+ // 2. Most unpacked integer types are not legal and thus integer extends
+ // cannot be used to convert between unpacked and packed types.
+ // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
+ // to transition between unpacked and packed types of the same element type,
+ // with BITCAST used otherwise.
+ SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
};
namespace AArch64 {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
index cf08f56e5b..eb03fce945 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrFormats.td
@@ -60,14 +60,14 @@ class AArch64Inst<Format f, string cstr> : Instruction {
bits<2> Form = F.Value;
// Defaults
- bit isWhile = 0;
- bit isPTestLike = 0;
+ bit isWhile = 0;
+ bit isPTestLike = 0;
FalseLanesEnum FalseLanes = FalseLanesNone;
DestructiveInstTypeEnum DestructiveInstType = NotDestructive;
ElementSizeEnum ElementSize = ElementSizeNone;
- let TSFlags{10} = isPTestLike;
- let TSFlags{9} = isWhile;
+ let TSFlags{10} = isPTestLike;
+ let TSFlags{9} = isWhile;
let TSFlags{8-7} = FalseLanes.Value;
let TSFlags{6-3} = DestructiveInstType.Value;
let TSFlags{2-0} = ElementSize.Value;
@@ -267,7 +267,7 @@ def adrplabel : Operand<i64> {
let EncoderMethod = "getAdrLabelOpValue";
let PrintMethod = "printAdrpLabel";
let ParserMatchClass = AdrpOperand;
- let OperandType = "OPERAND_PCREL";
+ let OperandType = "OPERAND_PCREL";
}
def AdrOperand : AsmOperandClass {
@@ -330,7 +330,7 @@ def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
}
def SImm8Operand : SImmOperand<8>;
-def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> {
+def simm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= -128 && Imm < 128; }]> {
let ParserMatchClass = SImm8Operand;
let DecoderMethod = "DecodeSImm<8>";
}
@@ -919,13 +919,13 @@ def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_1Operand;
}
-// timm0_1 - as above, but use TargetConstant (TImmLeaf)
-def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
- return ((uint64_t)Imm) < 2;
-}]> {
- let ParserMatchClass = Imm0_1Operand;
-}
-
+// timm0_1 - as above, but use TargetConstant (TImmLeaf)
+def timm0_1 : Operand<i64>, TImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -1301,9 +1301,9 @@ class SimpleSystemI<bit L, dag iops, string asm, string operands,
}
// System instructions which have an Rt register.
-class RtSystemI<bit L, dag oops, dag iops, string asm, string operands,
- list<dag> pattern = []>
- : BaseSystemI<L, oops, iops, asm, operands, pattern>,
+class RtSystemI<bit L, dag oops, dag iops, string asm, string operands,
+ list<dag> pattern = []>
+ : BaseSystemI<L, oops, iops, asm, operands, pattern>,
Sched<[WriteSys]> {
bits<5> Rt;
let Inst{4-0} = Rt;
@@ -1331,16 +1331,16 @@ class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
let Inst{4-0} = Rt;
}
-// System instructions that pass a register argument
-// This class assumes the register is for input rather than output.
-class RegInputSystemI<bits<4> CRm, bits<3> Op2, string asm,
- list<dag> pattern = []>
- : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> {
- let Inst{20-12} = 0b000110001;
- let Inst{11-8} = CRm;
- let Inst{7-5} = Op2;
-}
-
+// System instructions that pass a register argument
+// This class assumes the register is for input rather than output.
+class RegInputSystemI<bits<4> CRm, bits<3> Op2, string asm,
+ list<dag> pattern = []>
+ : RtSystemI<0, (outs), (ins GPR64:$Rt), asm, "\t$Rt", pattern> {
+ let Inst{20-12} = 0b000110001;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = Op2;
+}
+
// System instructions for transactional memory - no operand
class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
: TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
@@ -1381,14 +1381,14 @@ def barrier_op : Operand<i32> {
let PrintMethod = "printBarrierOption";
let ParserMatchClass = BarrierAsmOperand;
}
-def BarriernXSAsmOperand : AsmOperandClass {
- let Name = "BarriernXS";
- let ParserMethod = "tryParseBarriernXSOperand";
-}
-def barrier_nxs_op : Operand<i32> {
- let PrintMethod = "printBarriernXSOption";
- let ParserMatchClass = BarriernXSAsmOperand;
-}
+def BarriernXSAsmOperand : AsmOperandClass {
+ let Name = "BarriernXS";
+ let ParserMethod = "tryParseBarriernXSOperand";
+}
+def barrier_nxs_op : Operand<i32> {
+ let PrintMethod = "printBarriernXSOption";
+ let ParserMatchClass = BarriernXSAsmOperand;
+}
class CRmSystemI<Operand crmtype, bits<3> opc, string asm,
list<dag> pattern = []>
: SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm", pattern>,
@@ -1470,7 +1470,7 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
let Inst{20-5} = systemreg;
- let DecoderNamespace = "Fallback";
+ let DecoderNamespace = "Fallback";
}
// FIXME: Some of these def NZCV, others don't. Best way to model that?
@@ -1480,7 +1480,7 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
"msr", "\t$systemreg, $Rt"> {
bits<16> systemreg;
let Inst{20-5} = systemreg;
- let DecoderNamespace = "Fallback";
+ let DecoderNamespace = "Fallback";
}
def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
@@ -1970,21 +1970,21 @@ class SignAuthTwoOperand<bits<4> opc, string asm,
let Inst{4-0} = Rd;
}
-class ClearAuth<bits<1> data, string asm>
- : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> {
- bits<5> Rd;
- let Inst{31-11} = 0b110110101100000101000;
- let Inst{10} = data;
- let Inst{9-5} = 0b11111;
- let Inst{4-0} = Rd;
-}
-
+class ClearAuth<bits<1> data, string asm>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn), asm, "\t$Rd", "$Rd = $Rn", []>, Sched<[]> {
+ bits<5> Rd;
+ let Inst{31-11} = 0b110110101100000101000;
+ let Inst{10} = data;
+ let Inst{9-5} = 0b11111;
+ let Inst{4-0} = Rd;
+}
+
// Base class for the Armv8.4-A 8 and 16-bit flag manipulation instructions
class BaseFlagManipulation<bit sf, bit sz, dag iops, string asm, string ops>
: I<(outs), iops, asm, ops, "", []>,
Sched<[WriteI, ReadI, ReadI]> {
let Uses = [NZCV];
- let Defs = [NZCV];
+ let Defs = [NZCV];
bits<5> Rn;
let Inst{31} = sf;
let Inst{30-15} = 0b0111010000000000;
@@ -3972,7 +3972,7 @@ class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset), asm,
"$Rn = $wback,@earlyclobber $wback", []>,
- Sched<[WriteAdr, WriteLD]>;
+ Sched<[WriteAdr, WriteLD]>;
let mayStore = 1, mayLoad = 0 in
class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -4018,7 +4018,7 @@ class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
(outs GPR64sp:$wback, regtype:$Rt),
(ins GPR64sp:$Rn, simm9:$offset),
asm, "$Rn = $wback,@earlyclobber $wback", []>,
- Sched<[WriteAdr, WriteLD]>;
+ Sched<[WriteAdr, WriteLD]>;
let mayStore = 1, mayLoad = 0 in
class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterOperand regtype,
@@ -4115,7 +4115,7 @@ class LoadPairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
: BaseLoadStorePairPreIdx<opc, V, 1,
(outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
(ins GPR64sp:$Rn, indextype:$offset), asm>,
- Sched<[WriteAdr, WriteLD, WriteLDHi]>;
+ Sched<[WriteAdr, WriteLD, WriteLDHi]>;
let mayStore = 1, mayLoad = 0 in
class StorePairPreIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -4156,7 +4156,7 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
: BaseLoadStorePairPostIdx<opc, V, 1,
(outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
(ins GPR64sp:$Rn, idxtype:$offset), asm>,
- Sched<[WriteAdr, WriteLD, WriteLDHi]>;
+ Sched<[WriteAdr, WriteLD, WriteLDHi]>;
let mayStore = 1, mayLoad = 0 in
class StorePairPostIdx<bits<2> opc, bit V, RegisterOperand regtype,
@@ -7874,9 +7874,9 @@ class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
def v4bf16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
- v2f32, v4bf16>;
+ v2f32, v4bf16>;
def v8bf16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
- v4f32, v8bf16>;
+ v4f32, v8bf16>;
}
class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
@@ -7894,7 +7894,7 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
(InputType RegType:$Rn),
(InputType (bitconvert (AccumType
(AArch64duplane32 (v4f32 V128:$Rm),
- VectorIndexS:$idx)))))))]> {
+ VectorIndexS:$idx)))))))]> {
bits<2> idx;
let Inst{21} = idx{0}; // L
@@ -7904,16 +7904,16 @@ class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
def v4bf16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
- ".2h", V64, v2f32, v4bf16>;
+ ".2h", V64, v2f32, v4bf16>;
def v8bf16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
- ".2h", V128, v4f32, v8bf16>;
+ ".2h", V128, v4f32, v8bf16>;
}
class SIMDBF16MLAL<bit Q, string asm, SDPatternOperator OpNode>
: BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
- (v8bf16 V128:$Rn),
- (v8bf16 V128:$Rm)))]> {
+ (v8bf16 V128:$Rn),
+ (v8bf16 V128:$Rm)))]> {
let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
}
@@ -7923,10 +7923,10 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
"{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
[(set (v4f32 V128:$dst),
(v4f32 (OpNode (v4f32 V128:$Rd),
- (v8bf16 V128:$Rn),
- (v8bf16
+ (v8bf16 V128:$Rn),
+ (v8bf16
(AArch64duplane16 (v8bf16 V128_lo:$Rm),
- VectorIndexH:$idx)))))]>,
+ VectorIndexH:$idx)))))]>,
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
@@ -7950,8 +7950,8 @@ class SIMDThreeSameVectorBF16MatrixMul<string asm>
V128, asm, ".4s",
[(set (v4f32 V128:$dst),
(int_aarch64_neon_bfmmla (v4f32 V128:$Rd),
- (v8bf16 V128:$Rn),
- (v8bf16 V128:$Rm)))]> {
+ (v8bf16 V128:$Rn),
+ (v8bf16 V128:$Rm)))]> {
let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
", $Rm", ".8h", "}");
}
@@ -10629,14 +10629,14 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v8f16 : BaseSIMDThreeSameVectorComplex<1, U, 0b01, opcode, V128, rottype,
asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
let Predicates = [HasComplxNum, HasNEON] in {
@@ -10645,21 +10645,21 @@ multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v4f32 : BaseSIMDThreeSameVectorComplex<1, U, 0b10, opcode, V128, rottype,
asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v2f64 : BaseSIMDThreeSameVectorComplex<1, U, 0b11, opcode, V128, rottype,
asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
}
@@ -10701,14 +10701,14 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
[(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
(v4f16 V64:$Rn),
(v4f16 V64:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v8f16 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b01, opcode, V128,
rottype, asm, ".8h",
[(set (v8f16 V128:$dst), (OpNode (v8f16 V128:$Rd),
(v8f16 V128:$Rn),
(v8f16 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
let Predicates = [HasComplxNum, HasNEON] in {
@@ -10717,21 +10717,21 @@ multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
[(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
(v2f32 V64:$Rn),
(v2f32 V64:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v4f32 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b10, opcode, V128,
rottype, asm, ".4s",
[(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd),
(v4f32 V128:$Rn),
(v4f32 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
def v2f64 : BaseSIMDThreeSameVectorTiedComplex<1, U, 0b11, opcode, V128,
rottype, asm, ".2d",
[(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd),
(v2f64 V128:$Rn),
(v2f64 V128:$Rm),
- (i32 rottype:$rot)))]>;
+ (i32 rottype:$rot)))]>;
}
}
@@ -11259,35 +11259,35 @@ multiclass STOPregister<string asm, string instr> {
!cast<Instruction>(instr # "X")>;
}
-class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
- dag iops, dag oops, list<dag> pat>
- : I<oops, iops, asm_inst, asm_ops, "", pat>,
- Sched<[]> /* FIXME: fill in scheduling details once known */ {
- bits<5> Rt;
- bits<5> Rn;
- let Inst{31-21} = 0b11111000001;
- let Inst{15} = 1;
- let Inst{14-12} = opc;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rt;
-
- let Predicates = [HasV8_7a];
-}
-
-class LoadStore64B<bits<3> opc, string asm_inst, dag iops, dag oops,
- list<dag> pat = []>
- : LoadStore64B_base<opc, asm_inst, "\t$Rt, [$Rn]", iops, oops, pat> {
- let Inst{20-16} = 0b11111;
-}
-
-class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []>
- : LoadStore64B_base<opc, asm_inst, "\t$Rs, $Rt, [$Rn]",
- (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs GPR64:$Rs), pat> {
- bits<5> Rs;
- let Inst{20-16} = Rs;
-}
-
+class LoadStore64B_base<bits<3> opc, string asm_inst, string asm_ops,
+ dag iops, dag oops, list<dag> pat>
+ : I<oops, iops, asm_inst, asm_ops, "", pat>,
+ Sched<[]> /* FIXME: fill in scheduling details once known */ {
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{31-21} = 0b11111000001;
+ let Inst{15} = 1;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Predicates = [HasV8_7a];
+}
+
+class LoadStore64B<bits<3> opc, string asm_inst, dag iops, dag oops,
+ list<dag> pat = []>
+ : LoadStore64B_base<opc, asm_inst, "\t$Rt, [$Rn]", iops, oops, pat> {
+ let Inst{20-16} = 0b11111;
+}
+
+class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []>
+ : LoadStore64B_base<opc, asm_inst, "\t$Rs, $Rt, [$Rn]",
+ (ins GPR64x8:$Rt, GPR64sp:$Rn), (outs GPR64:$Rs), pat> {
+ bits<5> Rs;
+ let Inst{20-16} = Rs;
+}
+
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".4B", ".4b">; // Add dot product
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
index 25656fac1d..b7d5014166 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrGISel.td
@@ -88,29 +88,29 @@ def G_DUP: AArch64GenericInstruction {
let InOperandList = (ins type1:$lane);
let hasSideEffects = 0;
}
-
-// Represents a lane duplicate operation.
-def G_DUPLANE8 : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src, type1:$lane);
- let hasSideEffects = 0;
-}
-def G_DUPLANE16 : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src, type1:$lane);
- let hasSideEffects = 0;
-}
-def G_DUPLANE32 : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src, type1:$lane);
- let hasSideEffects = 0;
-}
-def G_DUPLANE64 : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src, type1:$lane);
- let hasSideEffects = 0;
-}
-
+
+// Represents a lane duplicate operation.
+def G_DUPLANE8 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE16 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE32 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+def G_DUPLANE64 : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src, type1:$lane);
+ let hasSideEffects = 0;
+}
+
// Represents a trn1 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_TRN1 : AArch64GenericInstruction {
@@ -134,28 +134,28 @@ def G_EXT: AArch64GenericInstruction {
let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
}
-// Represents a vector G_ASHR with an immediate.
-def G_VASHR : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
-}
-
-// Represents a vector G_LSHR with an immediate.
-def G_VLSHR : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
-}
-
-// Represents an integer to FP conversion on the FPR bank.
-def G_SITOF : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
-}
-def G_UITOF : AArch64GenericInstruction {
- let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
-}
-
+// Represents a vector G_ASHR with an immediate.
+def G_VASHR : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+}
+
+// Represents a vector G_LSHR with an immediate.
+def G_VLSHR : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
+}
+
+// Represents an integer to FP conversion on the FPR bank.
+def G_SITOF : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+}
+def G_UITOF : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+}
+
def : GINodeEquiv<G_REV16, AArch64rev16>;
def : GINodeEquiv<G_REV32, AArch64rev32>;
def : GINodeEquiv<G_REV64, AArch64rev64>;
@@ -164,21 +164,21 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
def : GINodeEquiv<G_DUP, AArch64dup>;
-def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
-def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
-def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
-def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
+def : GINodeEquiv<G_DUPLANE8, AArch64duplane8>;
+def : GINodeEquiv<G_DUPLANE16, AArch64duplane16>;
+def : GINodeEquiv<G_DUPLANE32, AArch64duplane32>;
+def : GINodeEquiv<G_DUPLANE64, AArch64duplane64>;
def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
-def : GINodeEquiv<G_VASHR, AArch64vashr>;
-def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
-def : GINodeEquiv<G_SITOF, AArch64sitof>;
-def : GINodeEquiv<G_UITOF, AArch64uitof>;
-
-def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
-
-// These are patterns that we only use for GlobalISel via the importer.
-def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
- (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
- (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
+def : GINodeEquiv<G_VASHR, AArch64vashr>;
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
+def : GINodeEquiv<G_SITOF, AArch64sitof>;
+def : GINodeEquiv<G_UITOF, AArch64uitof>;
+
+def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+
+// These are patterns that we only use for GlobalISel via the importer.
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
+ (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
+ (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6b38e216a8..fc3e238182 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -107,13 +107,13 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
break;
- case TargetOpcode::STATEPOINT:
- NumBytes = StatepointOpers(&MI).getNumPatchBytes();
- assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
- // No patch bytes means a normal call inst is emitted
- if (NumBytes == 0)
- NumBytes = 4;
- break;
+ case TargetOpcode::STATEPOINT:
+ NumBytes = StatepointOpers(&MI).getNumPatchBytes();
+ assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ // No patch bytes means a normal call inst is emitted
+ if (NumBytes == 0)
+ NumBytes = 4;
+ break;
case AArch64::TLSDESC_CALLSEQ:
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
@@ -294,31 +294,31 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
}
- // If we're allowed to modify and the block ends in a unconditional branch
- // which could simply fallthrough, remove the branch. (Note: This case only
- // matters when we can't understand the whole sequence, otherwise it's also
- // handled by BranchFolding.cpp.)
- if (AllowModify && isUncondBranchOpcode(LastOpc) &&
- MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
- LastInst->eraseFromParent();
- LastInst = SecondLastInst;
- LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
- assert(!isUncondBranchOpcode(LastOpc) &&
- "unreachable unconditional branches removed above");
-
- if (isCondBranchOpcode(LastOpc)) {
- // Block ends with fall-through condbranch.
- parseCondBranch(LastInst, TBB, Cond);
- return false;
- }
- return true; // Can't handle indirect branch.
- } else {
- SecondLastInst = &*I;
- SecondLastOpc = SecondLastInst->getOpcode();
- }
- }
-
+ // If we're allowed to modify and the block ends in a unconditional branch
+ // which could simply fallthrough, remove the branch. (Note: This case only
+ // matters when we can't understand the whole sequence, otherwise it's also
+ // handled by BranchFolding.cpp.)
+ if (AllowModify && isUncondBranchOpcode(LastOpc) &&
+ MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+ assert(!isUncondBranchOpcode(LastOpc) &&
+ "unreachable unconditional branches removed above");
+
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ } else {
+ SecondLastInst = &*I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
@@ -353,56 +353,56 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
}
-bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
- MachineBranchPredicate &MBP,
- bool AllowModify) const {
- // For the moment, handle only a block which ends with a cb(n)zx followed by
- // a fallthrough. Why this? Because it is a common form.
- // TODO: Should we handle b.cc?
-
- MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
- if (I == MBB.end())
- return true;
-
- // Skip over SpeculationBarrierEndBB terminators
- if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
- I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
- --I;
- }
-
- if (!isUnpredicatedTerminator(*I))
- return true;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = &*I;
- unsigned LastOpc = LastInst->getOpcode();
- if (!isCondBranchOpcode(LastOpc))
- return true;
-
- switch (LastOpc) {
- default:
- return true;
- case AArch64::CBZW:
- case AArch64::CBZX:
- case AArch64::CBNZW:
- case AArch64::CBNZX:
- break;
- };
-
- MBP.TrueDest = LastInst->getOperand(1).getMBB();
- assert(MBP.TrueDest && "expected!");
- MBP.FalseDest = MBB.getNextNode();
-
- MBP.ConditionDef = nullptr;
- MBP.SingleUseCondition = false;
-
- MBP.LHS = LastInst->getOperand(0);
- MBP.RHS = MachineOperand::CreateImm(0);
- MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
- : MachineBranchPredicate::PRED_EQ;
- return false;
-}
-
+bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const {
+ // For the moment, handle only a block which ends with a cb(n)zx followed by
+ // a fallthrough. Why this? Because it is a common form.
+ // TODO: Should we handle b.cc?
+
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return true;
+
+ // Skip over SpeculationBarrierEndBB terminators
+ if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
+ I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
+ --I;
+ }
+
+ if (!isUnpredicatedTerminator(*I))
+ return true;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ unsigned LastOpc = LastInst->getOpcode();
+ if (!isCondBranchOpcode(LastOpc))
+ return true;
+
+ switch (LastOpc) {
+ default:
+ return true;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ break;
+ };
+
+ MBP.TrueDest = LastInst->getOperand(1).getMBB();
+ assert(MBP.TrueDest && "expected!");
+ MBP.FalseDest = MBB.getNextNode();
+
+ MBP.ConditionDef = nullptr;
+ MBP.SingleUseCondition = false;
+
+ MBP.LHS = LastInst->getOperand(0);
+ MBP.RHS = MachineOperand::CreateImm(0);
+ MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
+ : MachineBranchPredicate::PRED_EQ;
+ return false;
+}
+
bool AArch64InstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond[0].getImm() != -1) {
@@ -1119,13 +1119,13 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
switch (MI.getOpcode()) {
default:
break;
- case AArch64::PTEST_PP:
- SrcReg = MI.getOperand(0).getReg();
- SrcReg2 = MI.getOperand(1).getReg();
- // Not sure about the mask and value for now...
- CmpMask = ~0;
- CmpValue = 0;
- return true;
+ case AArch64::PTEST_PP:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = MI.getOperand(1).getReg();
+ // Not sure about the mask and value for now...
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
case AArch64::SUBSWrr:
case AArch64::SUBSWrs:
case AArch64::SUBSWrx:
@@ -1281,9 +1281,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return true;
// From must be above To.
- assert(std::any_of(
- ++To.getReverse(), To->getParent()->rend(),
- [From](MachineInstr &MI) { return MI.getIterator() == From; }));
+ assert(std::any_of(
+ ++To.getReverse(), To->getParent()->rend(),
+ [From](MachineInstr &MI) { return MI.getIterator() == From; }));
// We iterate backward starting at \p To until we hit \p From.
for (const MachineInstr &Instr :
@@ -1296,127 +1296,127 @@ static bool areCFlagsAccessedBetweenInstrs(
return false;
}
-/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
-/// operation which could set the flags in an identical manner
-bool AArch64InstrInfo::optimizePTestInstr(
- MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
- const MachineRegisterInfo *MRI) const {
- auto *Mask = MRI->getUniqueVRegDef(MaskReg);
- auto *Pred = MRI->getUniqueVRegDef(PredReg);
- auto NewOp = Pred->getOpcode();
- bool OpChanged = false;
-
- unsigned MaskOpcode = Mask->getOpcode();
- unsigned PredOpcode = Pred->getOpcode();
- bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
- bool PredIsWhileLike = isWhileOpcode(PredOpcode);
-
- if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
- // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
- // deactivate any lanes OTHER_INST might set.
- uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
- uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
-
- // Must be an all active predicate of matching element size.
- if ((PredElementSize != MaskElementSize) ||
- (Mask->getOperand(1).getImm() != 31))
- return false;
-
- // Fallthough to simply remove the PTEST.
- } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
- // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
- // instruction that sets the flags as PTEST would.
-
- // Fallthough to simply remove the PTEST.
- } else if (PredIsPTestLike) {
- // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
- // instructions use the same predicate.
- auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PTestLikeMask)
- return false;
-
- // Fallthough to simply remove the PTEST.
- } else {
- switch (Pred->getOpcode()) {
- case AArch64::BRKB_PPzP:
- case AArch64::BRKPB_PPzPP: {
- // Op 0 is chain, 1 is the mask, 2 the previous predicate to
- // propagate, 3 the new predicate.
-
- // Check to see if our mask is the same as the brkpb's. If
- // not the resulting flag bits may be different and we
- // can't remove the ptest.
- auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PredMask)
- return false;
-
- // Switch to the new opcode
- NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
- : AArch64::BRKPBS_PPzPP;
- OpChanged = true;
- break;
- }
- case AArch64::BRKN_PPzP: {
- auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PredMask)
- return false;
-
- NewOp = AArch64::BRKNS_PPzP;
- OpChanged = true;
- break;
- }
- default:
- // Bail out if we don't recognize the input
- return false;
- }
- }
-
- const TargetRegisterInfo *TRI = &getRegisterInfo();
-
- // If the predicate is in a different block (possibly because its been
- // hoisted out), then assume the flags are set in between statements.
- if (Pred->getParent() != PTest->getParent())
- return false;
-
- // If another instruction between the propagation and test sets the
- // flags, don't remove the ptest.
- MachineBasicBlock::iterator I = Pred, E = PTest;
- ++I; // Skip past the predicate op itself.
- for (; I != E; ++I) {
- const MachineInstr &Inst = *I;
-
- // TODO: If the ptest flags are unused, we could still remove it.
- if (Inst.modifiesRegister(AArch64::NZCV, TRI))
- return false;
- }
-
- // If we pass all the checks, it's safe to remove the PTEST and use the flags
- // as they are prior to PTEST. Sometimes this requires the tested PTEST
- // operand to be replaced with an equivalent instruction that also sets the
- // flags.
- Pred->setDesc(get(NewOp));
- PTest->eraseFromParent();
- if (OpChanged) {
- bool succeeded = UpdateOperandRegClass(*Pred);
- (void)succeeded;
- assert(succeeded && "Operands have incompatible register classes!");
- Pred->addRegisterDefined(AArch64::NZCV, TRI);
- }
-
- // Ensure that the flags def is live.
- if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
- unsigned i = 0, e = Pred->getNumOperands();
- for (; i != e; ++i) {
- MachineOperand &MO = Pred->getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
- MO.setIsDead(false);
- break;
- }
- }
- }
- return true;
-}
-
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
+/// operation which could set the flags in an identical manner
+bool AArch64InstrInfo::optimizePTestInstr(
+ MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
+ const MachineRegisterInfo *MRI) const {
+ auto *Mask = MRI->getUniqueVRegDef(MaskReg);
+ auto *Pred = MRI->getUniqueVRegDef(PredReg);
+ auto NewOp = Pred->getOpcode();
+ bool OpChanged = false;
+
+ unsigned MaskOpcode = Mask->getOpcode();
+ unsigned PredOpcode = Pred->getOpcode();
+ bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
+ bool PredIsWhileLike = isWhileOpcode(PredOpcode);
+
+ if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
+ // deactivate any lanes OTHER_INST might set.
+ uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
+ uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
+
+ // Must be an all active predicate of matching element size.
+ if ((PredElementSize != MaskElementSize) ||
+ (Mask->getOperand(1).getImm() != 31))
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would.
+
+ // Fallthough to simply remove the PTEST.
+ } else if (PredIsPTestLike) {
+ // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
+ // instructions use the same predicate.
+ auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PTestLikeMask)
+ return false;
+
+ // Fallthough to simply remove the PTEST.
+ } else {
+ switch (Pred->getOpcode()) {
+ case AArch64::BRKB_PPzP:
+ case AArch64::BRKPB_PPzPP: {
+ // Op 0 is chain, 1 is the mask, 2 the previous predicate to
+ // propagate, 3 the new predicate.
+
+ // Check to see if our mask is the same as the brkpb's. If
+ // not the resulting flag bits may be different and we
+ // can't remove the ptest.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ // Switch to the new opcode
+ NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
+ : AArch64::BRKPBS_PPzPP;
+ OpChanged = true;
+ break;
+ }
+ case AArch64::BRKN_PPzP: {
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return false;
+
+ NewOp = AArch64::BRKNS_PPzP;
+ OpChanged = true;
+ break;
+ }
+ default:
+ // Bail out if we don't recognize the input
+ return false;
+ }
+ }
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // If the predicate is in a different block (possibly because its been
+ // hoisted out), then assume the flags are set in between statements.
+ if (Pred->getParent() != PTest->getParent())
+ return false;
+
+ // If another instruction between the propagation and test sets the
+ // flags, don't remove the ptest.
+ MachineBasicBlock::iterator I = Pred, E = PTest;
+ ++I; // Skip past the predicate op itself.
+ for (; I != E; ++I) {
+ const MachineInstr &Inst = *I;
+
+ // TODO: If the ptest flags are unused, we could still remove it.
+ if (Inst.modifiesRegister(AArch64::NZCV, TRI))
+ return false;
+ }
+
+ // If we pass all the checks, it's safe to remove the PTEST and use the flags
+ // as they are prior to PTEST. Sometimes this requires the tested PTEST
+ // operand to be replaced with an equivalent instruction that also sets the
+ // flags.
+ Pred->setDesc(get(NewOp));
+ PTest->eraseFromParent();
+ if (OpChanged) {
+ bool succeeded = UpdateOperandRegClass(*Pred);
+ (void)succeeded;
+ assert(succeeded && "Operands have incompatible register classes!");
+ Pred->addRegisterDefined(AArch64::NZCV, TRI);
+ }
+
+ // Ensure that the flags def is live.
+ if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
+ unsigned i = 0, e = Pred->getNumOperands();
+ for (; i != e; ++i) {
+ MachineOperand &MO = Pred->getOperand(i);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
+ MO.setIsDead(false);
+ break;
+ }
+ }
+ }
+ return true;
+}
+
/// Try to optimize a compare instruction. A compare instruction is an
/// instruction which produces AArch64::NZCV. It can be truly compare
/// instruction
@@ -1455,9 +1455,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
return true;
}
- if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
- return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
-
+ if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+ return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
+
// Continue only if we have a "ri" where immediate is zero.
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
// function.
@@ -2274,24 +2274,24 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return true;
}
-Optional<ExtAddrMode>
-AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
- const TargetRegisterInfo *TRI) const {
- const MachineOperand *Base; // Filled with the base operand of MI.
- int64_t Offset; // Filled with the offset of MI.
- bool OffsetIsScalable;
- if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
- return None;
-
- if (!Base->isReg())
- return None;
- ExtAddrMode AM;
- AM.BaseReg = Base->getReg();
- AM.Displacement = Offset;
- AM.ScaledReg = 0;
- return AM;
-}
-
+Optional<ExtAddrMode>
+AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const {
+ const MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ bool OffsetIsScalable;
+ if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
+ return None;
+
+ if (!Base->isReg())
+ return None;
+ ExtAddrMode AM;
+ AM.BaseReg = Base->getReg();
+ AM.Displacement = Offset;
+ AM.ScaledReg = 0;
+ return AM;
+}
+
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
@@ -3290,7 +3290,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3334,7 +3334,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3356,7 +3356,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3367,7 +3367,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3378,7 +3378,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3444,7 +3444,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 4:
@@ -3488,7 +3488,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 24:
@@ -3510,7 +3510,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 48:
@@ -3521,7 +3521,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
case 64:
@@ -3532,7 +3532,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZZXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalableVector;
}
break;
}
@@ -3559,47 +3559,47 @@ bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
});
}
-void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
- const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
- // The smallest scalable element supported by scaled SVE addressing
- // modes are predicates, which are 2 scalable bytes in size. So the scalable
- // byte offset must always be a multiple of 2.
- assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
- // VGSized offsets are divided by '2', because the VG register is the
- // the number of 64bit granules as opposed to 128bit vector chunks,
- // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
- // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
- // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
- ByteSized = Offset.getFixed();
- VGSized = Offset.getScalable() / 2;
-}
-
-/// Returns the offset in parts to which this frame offset can be
-/// decomposed for the purpose of describing a frame offset.
-/// For non-scalable offsets this is simply its byte size.
-void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
- const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
- int64_t &NumDataVectors) {
- // The smallest scalable element supported by scaled SVE addressing
- // modes are predicates, which are 2 scalable bytes in size. So the scalable
- // byte offset must always be a multiple of 2.
- assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
- NumBytes = Offset.getFixed();
- NumDataVectors = 0;
- NumPredicateVectors = Offset.getScalable() / 2;
- // This method is used to get the offsets to adjust the frame offset.
- // If the function requires ADDPL to be used and needs more than two ADDPL
- // instructions, part of the offset is folded into NumDataVectors so that it
- // uses ADDVL for part of it, reducing the number of ADDPL instructions.
- if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
- NumPredicateVectors > 62) {
- NumDataVectors = NumPredicateVectors / 8;
- NumPredicateVectors -= NumDataVectors * 8;
- }
-}
-
+void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
+ const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ // VGSized offsets are divided by '2', because the VG register is the
+ // the number of 64bit granules as opposed to 128bit vector chunks,
+ // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
+ // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
+ // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
+ ByteSized = Offset.getFixed();
+ VGSized = Offset.getScalable() / 2;
+}
+
+/// Returns the offset in parts to which this frame offset can be
+/// decomposed for the purpose of describing a frame offset.
+/// For non-scalable offsets this is simply its byte size.
+void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors) {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ NumBytes = Offset.getFixed();
+ NumDataVectors = 0;
+ NumPredicateVectors = Offset.getScalable() / 2;
+ // This method is used to get the offsets to adjust the frame offset.
+ // If the function requires ADDPL to be used and needs more than two ADDPL
+ // instructions, part of the offset is folded into NumDataVectors so that it
+ // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+ if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+ NumPredicateVectors > 62) {
+ NumDataVectors = NumPredicateVectors / 8;
+ NumPredicateVectors -= NumDataVectors * 8;
+ }
+}
+
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
@@ -3709,13 +3709,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
- AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
- Offset, Bytes, NumPredicateVectors, NumDataVectors);
+ AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
+ Offset, Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
- assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
- "SP increment/decrement not 8-byte aligned");
+ assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
+ "SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
@@ -3970,7 +3970,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
// Construct the complete offset.
bool IsMulVL = ScaleValue.isScalable();
unsigned Scale = ScaleValue.getKnownMinSize();
- int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
+ int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
@@ -4012,9 +4012,9 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
*OutUnscaledOp = *UnscaledOp;
if (IsMulVL)
- SOffset = StackOffset::get(SOffset.getFixed(), Offset);
+ SOffset = StackOffset::get(SOffset.getFixed(), Offset);
else
- SOffset = StackOffset::get(Offset, SOffset.getScalable());
+ SOffset = StackOffset::get(Offset, SOffset.getScalable());
return AArch64FrameOffsetCanUpdate |
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
@@ -4026,7 +4026,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
- Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
+ Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
@@ -4131,7 +4131,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
return false;
}
-// FP Opcodes that can be combined with a FMUL.
+// FP Opcodes that can be combined with a FMUL.
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
default:
@@ -4153,12 +4153,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
- // the target options or if FADD/FSUB has the contract fast-math flag.
- return Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Inst.getFlag(MachineInstr::FmContract);
- return true;
+ // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
+ // the target options or if FADD/FSUB has the contract fast-math flag.
+ return Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Inst.getFlag(MachineInstr::FmContract);
+ return true;
}
return false;
}
@@ -4638,8 +4638,8 @@ bool AArch64InstrInfo::isThroughputPattern(
/// pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
// Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
@@ -4647,8 +4647,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getFMAPatterns(Root, Patterns))
return true;
- return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
- DoRegPressureReduce);
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
}
enum class FMAInstKind { Default, Indexed, Accumulator };
@@ -4871,7 +4871,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- MachineInstr *MUL = nullptr;
+ MachineInstr *MUL = nullptr;
const TargetRegisterClass *RC;
unsigned Opc;
switch (Pattern) {
@@ -5692,9 +5692,9 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
- // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
- // CodeGen/AArch64/urem-seteq-nonzero.ll.
- // assert(MUL && "MUL was never set");
+ // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
+ // CodeGen/AArch64/urem-seteq-nonzero.ll.
+ // assert(MUL && "MUL was never set");
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -6034,20 +6034,20 @@ AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
- const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
- MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
+ return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
+ MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
}
static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
- const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
- const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
+ const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
- return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
+ return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
}
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
@@ -6104,9 +6104,9 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// necessary. However, at this point we don't know if the outlined function
// will have a RET instruction so we assume the worst.
const TargetRegisterInfo &TRI = getRegisterInfo();
- if (FirstCand.getMF()
- ->getInfo<AArch64FunctionInfo>()
- ->shouldSignReturnAddress(true)) {
+ if (FirstCand.getMF()
+ ->getInfo<AArch64FunctionInfo>()
+ ->shouldSignReturnAddress(true)) {
// One PAC and one AUT instructions
NumBytesToCreateFrame += 8;
@@ -6163,7 +6163,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
return false;
};
// Remove candidates with illegal stack modifying instructions
- llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
+ llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -6206,7 +6206,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
- llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
+ llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
@@ -6229,7 +6229,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame += 4;
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
- return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
+ return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
});
// We check to see if CFI Instructions are present, and if they are
@@ -6398,60 +6398,60 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
FrameID = MachineOutlinerNoLRSave;
} else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
-
- // Bugzilla ID: 46767
- // TODO: Check if fixing up the stack more than once is safe so we can
- // outline these.
- //
- // An outline resulting in a caller that requires stack fixups at the
- // callsite to a callee that also requires stack fixups can happen when
- // there are no available registers at the candidate callsite for a
- // candidate that itself also has calls.
- //
- // In other words if function_containing_sequence in the following pseudo
- // assembly requires that we save LR at the point of the call, but there
- // are no available registers: in this case we save using SP and as a
- // result the SP offsets requires stack fixups by multiples of 16.
- //
- // function_containing_sequence:
- // ...
- // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
- // call OUTLINED_FUNCTION_N
- // restore LR from SP
- // ...
- //
- // OUTLINED_FUNCTION_N:
- // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
- // ...
- // bl foo
- // restore LR from SP
- // ret
- //
- // Because the code to handle more than one stack fixup does not
- // currently have the proper checks for legality, these cases will assert
- // in the AArch64 MachineOutliner. This is because the code to do this
- // needs more hardening, testing, better checks that generated code is
- // legal, etc and because it is only verified to handle a single pass of
- // stack fixup.
- //
- // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
- // these cases until they are known to be handled. Bugzilla 46767 is
- // referenced in comments at the assert site.
- //
- // To avoid asserting (or generating non-legal code on noassert builds)
- // we remove all candidates which would need more than one stack fixup by
- // pruning the cases where the candidate has calls while also having no
- // available LR and having no available general purpose registers to copy
- // LR to (ie one extra stack save/restore).
- //
- if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
- erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
- return (std::any_of(
- C.front(), std::next(C.back()),
- [](const MachineInstr &MI) { return MI.isCall(); })) &&
- (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
- });
- }
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up the stack more than once is safe so we can
+ // outline these.
+ //
+ // An outline resulting in a caller that requires stack fixups at the
+ // callsite to a callee that also requires stack fixups can happen when
+ // there are no available registers at the candidate callsite for a
+ // candidate that itself also has calls.
+ //
+ // In other words if function_containing_sequence in the following pseudo
+ // assembly requires that we save LR at the point of the call, but there
+ // are no available registers: in this case we save using SP and as a
+ // result the SP offsets requires stack fixups by multiples of 16.
+ //
+ // function_containing_sequence:
+ // ...
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // call OUTLINED_FUNCTION_N
+ // restore LR from SP
+ // ...
+ //
+ // OUTLINED_FUNCTION_N:
+ // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
+ // ...
+ // bl foo
+ // restore LR from SP
+ // ret
+ //
+ // Because the code to handle more than one stack fixup does not
+ // currently have the proper checks for legality, these cases will assert
+ // in the AArch64 MachineOutliner. This is because the code to do this
+ // needs more hardening, testing, better checks that generated code is
+ // legal, etc and because it is only verified to handle a single pass of
+ // stack fixup.
+ //
+ // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
+ // these cases until they are known to be handled. Bugzilla 46767 is
+ // referenced in comments at the assert site.
+ //
+ // To avoid asserting (or generating non-legal code on noassert builds)
+ // we remove all candidates which would need more than one stack fixup by
+ // pruning the cases where the candidate has calls while also having no
+ // available LR and having no available general purpose registers to copy
+ // LR to (ie one extra stack save/restore).
+ //
+ if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
+ erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
+ return (std::any_of(
+ C.front(), std::next(C.back()),
+ [](const MachineInstr &MI) { return MI.isCall(); })) &&
+ (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
+ });
+ }
}
// If we dropped all of the candidates, bail out here.
@@ -6820,7 +6820,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
// If v8.3a features are available we can replace a RET instruction by
// RETAA or RETAB and omit the AUT instructions
- if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
+ if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
@@ -6872,12 +6872,12 @@ void AArch64InstrInfo::buildOutlinedFrame(
return MI.isCall() && !MI.isReturn();
};
- if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
+ if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
-
- // Bugzilla ID: 46767
- // TODO: Check if fixing up twice is safe so we can outline these.
+
+ // Bugzilla ID: 46767
+ // TODO: Check if fixing up twice is safe so we can outline these.
assert(OF.FrameConstructionID != MachineOutlinerDefault &&
"Can only fix up stack references once");
fixupPostOutline(MBB);
@@ -6934,11 +6934,11 @@ void AArch64InstrInfo::buildOutlinedFrame(
// If a bunch of candidates reach this point they must agree on their return
// address signing. It is therefore enough to just consider the signing
// behaviour of one of them
- const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
- bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
+ const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
+ bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
// a_key is the default
- bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
+ bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
@@ -7099,7 +7099,7 @@ Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
return None;
int Shift = MI.getOperand(3).getImm();
assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
- Offset = Sign * (MI.getOperand(2).getImm() << Shift);
+ Offset = Sign * (MI.getOperand(2).getImm() << Shift);
}
}
return RegImmPair{MI.getOperand(1).getReg(), Offset};
@@ -7175,14 +7175,14 @@ uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
-bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
- return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
-}
-
-bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
- return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
-}
-
+bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
+}
+
+bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
+ return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
+}
+
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
index 7434987e06..9b924a8440 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.h
@@ -112,10 +112,10 @@ public:
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- Optional<ExtAddrMode>
- getAddrModeFromMemoryOp(const MachineInstr &MemI,
- const TargetRegisterInfo *TRI) const override;
-
+ Optional<ExtAddrMode>
+ getAddrModeFromMemoryOp(const MachineInstr &MemI,
+ const TargetRegisterInfo *TRI) const override;
+
bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -191,9 +191,9 @@ public:
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const override;
- bool analyzeBranchPredicate(MachineBasicBlock &MBB,
- MachineBranchPredicate &MBP,
- bool AllowModify) const override;
+ bool analyzeBranchPredicate(MachineBasicBlock &MBB,
+ MachineBranchPredicate &MBP,
+ bool AllowModify) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
@@ -235,10 +235,10 @@ public:
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in ``Root``. All potential patterns are
/// listed in the ``Patterns`` array.
- bool
- getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns,
- bool DoRegPressureReduce) const override;
+ bool
+ getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const override;
/// Return true when Inst is associative and commutative so that it can be
/// reassociated.
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
@@ -280,12 +280,12 @@ public:
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
/// Returns the vector element size (B, H, S or D) of an SVE opcode.
uint64_t getElementSizeForOpcode(unsigned Opc) const;
- /// Returns true if the opcode is for an SVE instruction that sets the
- /// condition codes as if it's results had been fed to a PTEST instruction
- /// along with the same general predicate.
- bool isPTestLikeOpcode(unsigned Opc) const;
- /// Returns true if the opcode is for an SVE WHILE## instruction.
- bool isWhileOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE instruction that sets the
+ /// condition codes as if it's results had been fed to a PTEST instruction
+ /// along with the same general predicate.
+ bool isPTestLikeOpcode(unsigned Opc) const;
+ /// Returns true if the opcode is for an SVE WHILE## instruction.
+ bool isWhileOpcode(unsigned Opc) const;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
static bool isFalkorShiftExtFast(const MachineInstr &MI);
@@ -299,13 +299,13 @@ public:
Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
Register Reg) const override;
- static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
- int64_t &NumBytes,
- int64_t &NumPredicateVectors,
- int64_t &NumDataVectors);
- static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
- int64_t &ByteSized,
- int64_t &VGSized);
+ static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
+ int64_t &NumBytes,
+ int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors);
+ static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset,
+ int64_t &ByteSized,
+ int64_t &VGSized);
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
@@ -334,12 +334,12 @@ private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
-
- /// Remove a ptest of a predicate-generating operation that already sets, or
- /// can be made to set, the condition codes in an identical manner
- bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
- unsigned PredReg,
- const MachineRegisterInfo *MRI) const;
+
+ /// Remove a ptest of a predicate-generating operation that already sets, or
+ /// can be made to set, the condition codes in an identical manner
+ bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
+ unsigned PredReg,
+ const MachineRegisterInfo *MRI) const;
};
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
@@ -423,18 +423,18 @@ static inline bool isIndirectBranchOpcode(int Opc) {
return false;
}
-static inline bool isPTrueOpcode(unsigned Opc) {
- switch (Opc) {
- case AArch64::PTRUE_B:
- case AArch64::PTRUE_H:
- case AArch64::PTRUE_S:
- case AArch64::PTRUE_D:
- return true;
- default:
- return false;
- }
-}
-
+static inline bool isPTrueOpcode(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::PTRUE_B:
+ case AArch64::PTRUE_H:
+ case AArch64::PTRUE_S:
+ case AArch64::PTRUE_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return opcode to be used for indirect calls.
unsigned getBLRCallOpcode(const MachineFunction &MF);
@@ -442,7 +442,7 @@ unsigned getBLRCallOpcode(const MachineFunction &MF);
#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits
#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bit
#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits
-#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
+#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits
// }
namespace AArch64 {
@@ -475,14 +475,14 @@ enum FalseLaneType {
FalseLanesUndef = TSFLAG_FALSE_LANE_TYPE(0x2),
};
-// NOTE: This is a bit field.
-static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
-static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
-
+// NOTE: This is a bit field.
+static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1);
+static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2);
+
#undef TSFLAG_ELEMENT_SIZE_TYPE
#undef TSFLAG_DESTRUCTIVE_INST_TYPE
#undef TSFLAG_FALSE_LANE_TYPE
-#undef TSFLAG_INSTR_FLAGS
+#undef TSFLAG_INSTR_FLAGS
int getSVEPseudoMap(uint16_t Opcode);
int getSVERevInstr(uint16_t Opcode);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
index 171d3dbaa8..8051a6a937 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64InstrInfo.td
@@ -25,16 +25,16 @@ def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
-def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
- AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
+ AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
AssemblerPredicate<(all_of FeatureLOR), "lor">;
-def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
- AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
+def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
+ AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
def HasJS : Predicate<"Subtarget->hasJS()">,
AssemblerPredicate<(all_of FeatureJS), "jsconv">;
@@ -69,8 +69,8 @@ def HasPMU : Predicate<"Subtarget->hasPMU()">,
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
-def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
- AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
+def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
+ AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
@@ -151,16 +151,16 @@ def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
-def HasXS : Predicate<"Subtarget->hasXS()">,
- AssemblerPredicate<(all_of FeatureXS), "xs">;
-def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
- AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
-def HasLS64 : Predicate<"Subtarget->hasLS64()">,
- AssemblerPredicate<(all_of FeatureLS64), "ls64">;
-def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
- AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
-def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
- AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
+def HasXS : Predicate<"Subtarget->hasXS()">,
+ AssemblerPredicate<(all_of FeatureXS), "xs">;
+def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
+ AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
+def HasLS64 : Predicate<"Subtarget->hasLS64()">,
+ AssemblerPredicate<(all_of FeatureLS64), "ls64">;
+def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
+ AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
+def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
+ AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -411,12 +411,12 @@ def AArch64call : SDNode<"AArch64ISD::CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-
-def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
- SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
+
+def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
@@ -518,7 +518,7 @@ def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
- (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+ (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
@@ -570,19 +570,19 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
-def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
-def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
-
-def AArch64uabd_n : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
-def AArch64sabd_n : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>;
-
-def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
- [(AArch64uabd_n node:$lhs, node:$rhs),
- (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
-def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
- [(AArch64sabd_n node:$lhs, node:$rhs),
- (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
-
+def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
+def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
+
+def AArch64uabd_n : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
+def AArch64sabd_n : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>;
+
+def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(AArch64uabd_n node:$lhs, node:$rhs),
+ (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
+def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(AArch64sabd_n node:$lhs, node:$rhs),
+ (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
+
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -617,8 +617,8 @@ let RecomputePerFunction = 1 in {
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
- def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
- def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
+ def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
+ def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
@@ -716,8 +716,8 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// 32-bit jump table destination is actually only 2 instructions since we can
// use the table itself as a PC-relative base. But optimization occurs after
// branch relaxation so be pessimistic.
-let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
- isNotDuplicable = 1 in {
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
+ isNotDuplicable = 1 in {
def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;
@@ -801,34 +801,34 @@ def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
let Inst{12} = 0;
let Predicates = [HasTRACEV8_4];
}
-
-def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
- let CRm{1-0} = 0b11;
- let Inst{9-8} = 0b10;
- let Predicates = [HasXS];
-}
-
-let Predicates = [HasWFxT] in {
-def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
-def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
-}
-
-// Branch Record Buffer two-word mnemonic instructions
-class BRBEI<bits<3> op2, string keyword>
- : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
- let Inst{31-8} = 0b110101010000100101110010;
- let Inst{7-5} = op2;
- let Predicates = [HasBRBE];
-}
-def BRB_IALL: BRBEI<0b100, "\tiall">;
-def BRB_INJ: BRBEI<0b101, "\tinj">;
-
-}
-
-// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
-def : TokenAlias<"INJ", "inj">;
-def : TokenAlias<"IALL", "iall">;
-
+
+def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
+ let CRm{1-0} = 0b11;
+ let Inst{9-8} = 0b10;
+ let Predicates = [HasXS];
+}
+
+let Predicates = [HasWFxT] in {
+def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
+def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
+}
+
+// Branch Record Buffer two-word mnemonic instructions
+class BRBEI<bits<3> op2, string keyword>
+ : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
+ let Inst{31-8} = 0b110101010000100101110010;
+ let Inst{7-5} = op2;
+ let Predicates = [HasBRBE];
+}
+def BRB_IALL: BRBEI<0b100, "\tiall">;
+def BRB_INJ: BRBEI<0b101, "\tinj">;
+
+}
+
+// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
+def : TokenAlias<"INJ", "inj">;
+def : TokenAlias<"IALL", "iall">;
+
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", int_aarch64_neon_sdot>;
@@ -849,23 +849,23 @@ def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
-
-// Vector-scalar BFDOT:
-// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
-// register (the instruction uses a single 32-bit lane from it), so the pattern
-// is a bit tricky.
-def : Pat<(v2f32 (int_aarch64_neon_bfdot
- (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
- (v4bf16 (bitconvert
- (v2i32 (AArch64duplane32
- (v4i32 (bitconvert
- (v8bf16 (insert_subvector undef,
- (v4bf16 V64:$Rm),
- (i64 0))))),
- VectorIndexS:$idx)))))),
- (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
- VectorIndexS:$idx)>;
+
+// Vector-scalar BFDOT:
+// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
+// register (the instruction uses a single 32-bit lane from it), so the pattern
+// is a bit tricky.
+def : Pat<(v2f32 (int_aarch64_neon_bfdot
+ (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
+ (v4bf16 (bitconvert
+ (v2i32 (AArch64duplane32
+ (v4i32 (bitconvert
+ (v8bf16 (insert_subvector undef,
+ (v4bf16 V64:$Rm),
+ (i64 0))))),
+ VectorIndexS:$idx)))))),
+ (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
}
// ARMv8.6A AArch64 matrix multiplication
@@ -965,7 +965,7 @@ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
}
-
+
let Predicates = [HasComplxNum, HasNEON] in {
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
@@ -979,47 +979,47 @@ let Predicates = [HasComplxNum, HasNEON] in {
}
}
-multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
- def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
- (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
- (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
- (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
- (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
-}
-
-multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> {
- def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
- (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
- (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
- (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
- def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
- (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
-}
-
-
-let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
- defm : FCMLA_PATS<v4f16, V64>;
- defm : FCMLA_PATS<v8f16, V128>;
-
- defm : FCMLA_LANE_PATS<v4f16, V64,
- (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
- defm : FCMLA_LANE_PATS<v8f16, V128,
- (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
-}
-let Predicates = [HasComplxNum, HasNEON] in {
- defm : FCMLA_PATS<v2f32, V64>;
- defm : FCMLA_PATS<v4f32, V128>;
- defm : FCMLA_PATS<v2f64, V128>;
-
- defm : FCMLA_LANE_PATS<v4f32, V128,
- (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
-}
-
+multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
+ (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
+}
+
+multiclass FCMLA_LANE_PATS<ValueType ty, RegisterClass Reg, dag RHSDup> {
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
+ def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
+ (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
+}
+
+
+let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+ defm : FCMLA_PATS<v4f16, V64>;
+ defm : FCMLA_PATS<v8f16, V128>;
+
+ defm : FCMLA_LANE_PATS<v4f16, V64,
+ (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
+ defm : FCMLA_LANE_PATS<v8f16, V128,
+ (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
+}
+let Predicates = [HasComplxNum, HasNEON] in {
+ defm : FCMLA_PATS<v2f32, V64>;
+ defm : FCMLA_PATS<v4f32, V128>;
+ defm : FCMLA_PATS<v2f64, V128>;
+
+ defm : FCMLA_LANE_PATS<v4f32, V128,
+ (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
+}
+
// v8.3a Pointer Authentication
// These instructions inhabit part of the hint space and so can be used for
// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
@@ -1073,7 +1073,7 @@ def : InstAlias<"autib1716", (AUTIB1716), 0>;
def : InstAlias<"xpaclri", (XPACLRI), 0>;
// These pointer authentication instructions require armv8.3a
-let Predicates = [HasPAuth] in {
+let Predicates = [HasPAuth] in {
// When PA is enabled, a better mnemonic should be emitted.
def : InstAlias<"paciaz", (PACIAZ), 1>;
@@ -1104,8 +1104,8 @@ let Predicates = [HasPAuth] in {
defm PAC : SignAuth<0b000, 0b010, "pac">;
defm AUT : SignAuth<0b001, 0b011, "aut">;
- def XPACI : ClearAuth<0, "xpaci">;
- def XPACD : ClearAuth<1, "xpacd">;
+ def XPACI : ClearAuth<0, "xpaci">;
+ def XPACD : ClearAuth<1, "xpacd">;
def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
// Combined Instructions
@@ -1140,7 +1140,7 @@ let Predicates = [HasPAuth] in {
}
// v8.3a floating point conversion for javascript
-let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
+let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
"fjcvtzs",
[(set GPR32:$Rd,
@@ -1149,7 +1149,7 @@ def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
} // HasJS, HasFPARMv8
// v8.4 Flag manipulation instructions
-let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
+let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
let Inst{20-5} = 0b0000001000000000;
}
@@ -1157,7 +1157,7 @@ def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
"{\t$Rn, $imm, $mask}">;
-} // HasFlagM
+} // HasFlagM
// v8.5 flag manipulation instructions
let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
@@ -1206,12 +1206,12 @@ def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
-}
-
-let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
+}
+
+let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
- [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
+ [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
@@ -1558,16 +1558,16 @@ def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
-def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
- (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
-def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
- (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
+ (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
(SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
- (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
-def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
- (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
+ (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
+def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
+ (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
(UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
@@ -2154,8 +2154,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
Sched<[WriteBrReg]>,
PseudoInstExpansion<(BLR GPR64:$Rn)>;
- def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
- Sched<[WriteBrReg]>;
+ def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
+ Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
@@ -2165,10 +2165,10 @@ def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
-def : Pat<(AArch64call_rvmarker GPR64:$Rn),
- (BLR_RVMARKER GPR64:$Rn)>,
- Requires<[NoSLSBLRMitigation]>;
-
+def : Pat<(AArch64call_rvmarker GPR64:$Rn),
+ (BLR_RVMARKER GPR64:$Rn)>,
+ Requires<[NoSLSBLRMitigation]>;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
@@ -3900,7 +3900,7 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
// Floating point immediate move.
//===----------------------------------------------------------------------===//
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm FMOV : FPMoveImmediate<"fmov">;
}
@@ -3909,7 +3909,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
//===----------------------------------------------------------------------===//
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- AArch64uabd>;
+ AArch64uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))))),
@@ -4041,7 +4041,7 @@ def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
@@ -4160,9 +4160,9 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
-defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
+ TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
+defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
@@ -4179,9 +4179,9 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
-defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
+ TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
+defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
@@ -4579,10 +4579,10 @@ def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
(FCVTPSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
(FCVTPUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
- (FCVTZSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
- (FCVTZUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
+ (FCVTZSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
+ (FCVTZUv1i64 FPR64:$Rn)>;
def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
(FRECPEv1f16 FPR16:$Rn)>;
@@ -4754,9 +4754,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- AArch64sabd>;
+ AArch64sabd>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- AArch64sabd>;
+ AArch64sabd>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
@@ -4777,59 +4777,59 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- AArch64uabd>;
+ AArch64uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
- BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
+ BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
- BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
+ BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
- BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
+ BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
- BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
-
-// Additional patterns for [SU]ML[AS]L
-multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
- Instruction INST8B, Instruction INST4H, Instruction INST2S> {
- def : Pat<(v4i16 (opnode
- V64:$Ra,
- (v4i16 (extract_subvector
- (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
- (i64 0))))),
- (EXTRACT_SUBREG (v8i16 (INST8B
- (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
- V64:$Rn, V64:$Rm)), dsub)>;
- def : Pat<(v2i32 (opnode
- V64:$Ra,
- (v2i32 (extract_subvector
- (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
- (i64 0))))),
- (EXTRACT_SUBREG (v4i32 (INST4H
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
- V64:$Rn, V64:$Rm)), dsub)>;
- def : Pat<(v1i64 (opnode
- V64:$Ra,
- (v1i64 (extract_subvector
- (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
- (i64 0))))),
- (EXTRACT_SUBREG (v2i64 (INST2S
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
- V64:$Rn, V64:$Rm)), dsub)>;
-}
-
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
- UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
- SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
- UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
-defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
- SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
-
+ BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
+
+// Additional patterns for [SU]ML[AS]L
+multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v4i16 (opnode
+ V64:$Ra,
+ (v4i16 (extract_subvector
+ (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v8i16 (INST8B
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v2i32 (opnode
+ V64:$Ra,
+ (v2i32 (extract_subvector
+ (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v4i32 (INST4H
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v1i64 (opnode
+ V64:$Ra,
+ (v1i64 (extract_subvector
+ (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
+ (i64 0))))),
+ (EXTRACT_SUBREG (v2i64 (INST2S
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
+ V64:$Rn, V64:$Rm)), dsub)>;
+}
+
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+
// Additional patterns for SMULL and UMULL
multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
@@ -5041,26 +5041,26 @@ defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
-
-let Predicates = [HasFullFP16] in {
-def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
- (FADDPv2i16p
- (EXTRACT_SUBREG
- (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
- dsub))>;
-def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
- (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
-}
-def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
- (FADDPv2i32p
- (EXTRACT_SUBREG
- (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
- dsub))>;
-def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
- (FADDPv2i32p V64:$Rn)>;
-def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
- (FADDPv2i64p V128:$Rn)>;
-
+
+let Predicates = [HasFullFP16] in {
+def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
+ (FADDPv2i16p
+ (EXTRACT_SUBREG
+ (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
+ (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
+}
+def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
+ (FADDPv2i32p
+ (EXTRACT_SUBREG
+ (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
+ dsub))>;
+def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
+ (FADDPv2i32p V64:$Rn)>;
+def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
+ (FADDPv2i64p V128:$Rn)>;
+
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -5312,16 +5312,16 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
(i64 0)),
dsub)>;
-def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
- (i64 VectorIndexH:$imm)),
- (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
-def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
- (i64 VectorIndexS:$imm)),
- (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
-def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
- (i64 VectorIndexD:$imm)),
- (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
-
+def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
+ (i64 VectorIndexH:$imm)),
+ (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
+def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
+ (i64 VectorIndexS:$imm)),
+ (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
+def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
+ (i64 VectorIndexD:$imm)),
+ (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
+
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
(f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
(INSvi16lane
@@ -6833,18 +6833,18 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
-def : Pat<(debugtrap), (BRK 0xF000)>;
-
-def ubsan_trap_xform : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
-}]>;
-
-def ubsan_trap_imm : TImmLeaf<i32, [{
- return isUInt<8>(Imm);
-}], ubsan_trap_xform>;
-
-def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
-
+def : Pat<(debugtrap), (BRK 0xF000)>;
+
+def ubsan_trap_xform : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
+}]>;
+
+def ubsan_trap_imm : TImmLeaf<i32, [{
+ return isUInt<8>(Imm);
+}], ubsan_trap_xform>;
+
+def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
+
// Multiply high patterns which multiply the lower subvector using smull/umull
// and the upper subvector with smull2/umull2. Then shuffle the high the high
// part of both results together.
@@ -7639,9 +7639,9 @@ def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
(vector_extract (v4f32 FPR128:$Rn), (i64 1))),
(f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
- (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
- (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
+ (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
+ (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
// Scalar 64-bit shifts in FPR64 registers.
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
@@ -7844,23 +7844,23 @@ let AddedComplexity = 10 in {
// FIXME: add SVE dot-product patterns.
}
-let Predicates = [HasLS64] in {
- def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
- (outs GPR64x8:$Rt)>;
- def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
- (outs)>;
- def ST64BV: Store64BV<0b011, "st64bv">;
- def ST64BV0: Store64BV<0b010, "st64bv0">;
-
- class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
- : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
- (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
-
- def : ST64BPattern<int_aarch64_st64b, ST64B>;
- def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
- def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
-}
-
+let Predicates = [HasLS64] in {
+ def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
+ (outs GPR64x8:$Rt)>;
+ def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
+ (outs)>;
+ def ST64BV: Store64BV<0b011, "st64bv">;
+ def ST64BV0: Store64BV<0b010, "st64bv0">;
+
+ class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
+ : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
+ (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
+
+ def : ST64BPattern<int_aarch64_st64b, ST64B>;
+ def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
+ def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
+}
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index ad180cb293..f87385ccd4 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1186,10 +1186,10 @@ bool AArch64LoadStoreOpt::findMatchingStore(
// store instruction writes and the stored value is not modified, we can
// promote the load. Since we do not handle stores with pre-/post-index,
// it's unnecessary to check if BaseReg is modified by the store itself.
- // Also we can't handle stores without an immediate offset operand,
- // while the operand might be the address for a global variable.
+ // Also we can't handle stores without an immediate offset operand,
+ // while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
- BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
+ BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;
@@ -1552,27 +1552,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
continue;
}
}
- // If the destination register of one load is the same register or a
- // sub/super register of the other load, bail and keep looking. A
- // load-pair instruction with both destination registers the same is
- // UNPREDICTABLE and will result in an exception.
- if (MayLoad &&
- TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
+ // If the destination register of one load is the same register or a
+ // sub/super register of the other load, bail and keep looking. A
+ // load-pair instruction with both destination registers the same is
+ // UNPREDICTABLE and will result in an exception.
+ if (MayLoad &&
+ TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
MemInsns.push_back(&MI);
continue;
}
- // If the BaseReg has been modified, then we cannot do the optimization.
- // For example, in the following pattern
- // ldr x1 [x2]
- // ldr x2 [x3]
- // ldr x4 [x2, #8],
- // the first and third ldr cannot be converted to ldp x1, x4, [x2]
- if (!ModifiedRegUnits.available(BaseReg))
- return E;
-
+ // If the BaseReg has been modified, then we cannot do the optimization.
+ // For example, in the following pattern
+ // ldr x1 [x2]
+ // ldr x2 [x3]
+ // ldr x4 [x2, #8],
+ // the first and third ldr cannot be converted to ldp x1, x4, [x2]
+ if (!ModifiedRegUnits.available(BaseReg))
+ return E;
+
// If the Rt of the second instruction was not modified or used between
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
@@ -1763,11 +1763,11 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
return false;
}
-static bool needsWinCFI(const MachineFunction *MF) {
- return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
- MF->getFunction().needsUnwindTableEntry();
-}
-
+static bool needsWinCFI(const MachineFunction *MF) {
+ return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ MF->getFunction().needsUnwindTableEntry();
+}
+
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
@@ -1808,11 +1808,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// the memory access (I) and the increment (MBBI) can access the memory
// region defined by [SP, MBBI].
const bool BaseRegSP = BaseReg == AArch64::SP;
- if (BaseRegSP && needsWinCFI(I->getMF())) {
+ if (BaseRegSP && needsWinCFI(I->getMF())) {
// FIXME: For now, we always block the optimization over SP in windows
// targets as it requires to adjust the unwind/debug info, messing up
// the unwind info can actually cause a miscompile.
- return E;
+ return E;
}
for (unsigned Count = 0; MBBI != E && Count < Limit;
@@ -1868,14 +1868,14 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
}
}
- const bool BaseRegSP = BaseReg == AArch64::SP;
- if (BaseRegSP && needsWinCFI(I->getMF())) {
- // FIXME: For now, we always block the optimization over SP in windows
- // targets as it requires to adjust the unwind/debug info, messing up
- // the unwind info can actually cause a miscompile.
- return E;
- }
-
+ const bool BaseRegSP = BaseReg == AArch64::SP;
+ if (BaseRegSP && needsWinCFI(I->getMF())) {
+ // FIXME: For now, we always block the optimization over SP in windows
+ // targets as it requires to adjust the unwind/debug info, messing up
+ // the unwind info can actually cause a miscompile.
+ return E;
+ }
+
// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
index 10e191ff44..c923f53281 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -203,12 +203,12 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_SABS;
} else {
RefFlags |= AArch64MCExpr::VK_ABS;
-
- if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
- RefFlags |= AArch64MCExpr::VK_PAGE;
- else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
- AArch64II::MO_PAGEOFF)
- RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC;
+
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefFlags |= AArch64MCExpr::VK_PAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefFlags |= AArch64MCExpr::VK_PAGEOFF | AArch64MCExpr::VK_NC;
}
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 41343ba970..ebb501b779 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -14,9 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AArch64MachineFunctionInfo.h"
-#include "AArch64InstrInfo.h"
-#include <llvm/IR/Metadata.h>
-#include <llvm/IR/Module.h>
+#include "AArch64InstrInfo.h"
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
using namespace llvm;
@@ -33,82 +33,82 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
if (YamlMFI.HasRedZone.hasValue())
HasRedZone = YamlMFI.HasRedZone;
}
-
-static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
- // The function should be signed in the following situations:
- // - sign-return-address=all
- // - sign-return-address=non-leaf and the functions spills the LR
- if (!F.hasFnAttribute("sign-return-address")) {
- const Module &M = *F.getParent();
- if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("sign-return-address"))) {
- if (Sign->getZExtValue()) {
- if (const auto *All = mdconst::extract_or_null<ConstantInt>(
- M.getModuleFlag("sign-return-address-all")))
- return {true, All->getZExtValue()};
- return {true, false};
- }
- }
- return {false, false};
- }
-
- StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
- if (Scope.equals("none"))
- return {false, false};
-
- if (Scope.equals("all"))
- return {true, true};
-
- assert(Scope.equals("non-leaf"));
- return {true, false};
-}
-
-static bool ShouldSignWithBKey(const Function &F) {
- if (!F.hasFnAttribute("sign-return-address-key")) {
- if (const auto *BKey = mdconst::extract_or_null<ConstantInt>(
- F.getParent()->getModuleFlag("sign-return-address-with-bkey")))
- return BKey->getZExtValue();
- return false;
- }
-
- const StringRef Key =
- F.getFnAttribute("sign-return-address-key").getValueAsString();
- assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
- return Key.equals_lower("b_key");
-}
-
-AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
- // If we already know that the function doesn't have a redzone, set
- // HasRedZone here.
- if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
- HasRedZone = false;
-
- const Function &F = MF.getFunction();
- std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F);
- SignWithBKey = ShouldSignWithBKey(F);
-
- if (!F.hasFnAttribute("branch-target-enforcement")) {
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
- F.getParent()->getModuleFlag("branch-target-enforcement")))
- BranchTargetEnforcement = BTE->getZExtValue();
- return;
- }
-
- const StringRef BTIEnable = F.getFnAttribute("branch-target-enforcement").getValueAsString();
- assert(BTIEnable.equals_lower("true") || BTIEnable.equals_lower("false"));
- BranchTargetEnforcement = BTIEnable.equals_lower("true");
-}
-
-bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
- if (!SignReturnAddress)
- return false;
- if (SignReturnAddressAll)
- return true;
- return SpillsLR;
-}
-
-bool AArch64FunctionInfo::shouldSignReturnAddress() const {
- return shouldSignReturnAddress(llvm::any_of(
- MF.getFrameInfo().getCalleeSavedInfo(),
- [](const auto &Info) { return Info.getReg() == AArch64::LR; }));
-}
+
+static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
+ // The function should be signed in the following situations:
+ // - sign-return-address=all
+ // - sign-return-address=non-leaf and the functions spills the LR
+ if (!F.hasFnAttribute("sign-return-address")) {
+ const Module &M = *F.getParent();
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address"))) {
+ if (Sign->getZExtValue()) {
+ if (const auto *All = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address-all")))
+ return {true, All->getZExtValue()};
+ return {true, false};
+ }
+ }
+ return {false, false};
+ }
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return {false, false};
+
+ if (Scope.equals("all"))
+ return {true, true};
+
+ assert(Scope.equals("non-leaf"));
+ return {true, false};
+}
+
+static bool ShouldSignWithBKey(const Function &F) {
+ if (!F.hasFnAttribute("sign-return-address-key")) {
+ if (const auto *BKey = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("sign-return-address-with-bkey")))
+ return BKey->getZExtValue();
+ return false;
+ }
+
+ const StringRef Key =
+ F.getFnAttribute("sign-return-address-key").getValueAsString();
+ assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
+ return Key.equals_lower("b_key");
+}
+
+AArch64FunctionInfo::AArch64FunctionInfo(MachineFunction &MF) : MF(MF) {
+ // If we already know that the function doesn't have a redzone, set
+ // HasRedZone here.
+ if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
+ HasRedZone = false;
+
+ const Function &F = MF.getFunction();
+ std::tie(SignReturnAddress, SignReturnAddressAll) = GetSignReturnAddress(F);
+ SignWithBKey = ShouldSignWithBKey(F);
+
+ if (!F.hasFnAttribute("branch-target-enforcement")) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("branch-target-enforcement")))
+ BranchTargetEnforcement = BTE->getZExtValue();
+ return;
+ }
+
+ const StringRef BTIEnable = F.getFnAttribute("branch-target-enforcement").getValueAsString();
+ assert(BTIEnable.equals_lower("true") || BTIEnable.equals_lower("false"));
+ BranchTargetEnforcement = BTIEnable.equals_lower("true");
+}
+
+bool AArch64FunctionInfo::shouldSignReturnAddress(bool SpillsLR) const {
+ if (!SignReturnAddress)
+ return false;
+ if (SignReturnAddressAll)
+ return true;
+ return SpillsLR;
+}
+
+bool AArch64FunctionInfo::shouldSignReturnAddress() const {
+ return shouldSignReturnAddress(llvm::any_of(
+ MF.getFrameInfo().getCalleeSavedInfo(),
+ [](const auto &Info) { return Info.getReg() == AArch64::LR; }));
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index f60e2b6c31..b3f35a46c7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -35,9 +35,9 @@ class MachineInstr;
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
- /// Backreference to the machine function.
- MachineFunction &MF;
-
+ /// Backreference to the machine function.
+ MachineFunction &MF;
+
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
@@ -128,39 +128,39 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// that must be forwarded to every musttail call.
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
- /// FrameIndex for the tagged base pointer.
- Optional<int> TaggedBasePointerIndex;
-
- /// Offset from SP-at-entry to the tagged base pointer.
- /// Tagged base pointer is set up to point to the first (lowest address)
- /// tagged stack slot.
- unsigned TaggedBasePointerOffset;
+ /// FrameIndex for the tagged base pointer.
+ Optional<int> TaggedBasePointerIndex;
+ /// Offset from SP-at-entry to the tagged base pointer.
+ /// Tagged base pointer is set up to point to the first (lowest address)
+ /// tagged stack slot.
+ unsigned TaggedBasePointerOffset;
+
/// OutliningStyle denotes, if a function was outined, how it was outlined,
/// e.g. Tail Call, Thunk, or Function if none apply.
Optional<std::string> OutliningStyle;
- // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
- // CalleeSavedStackSize) to the address of the frame record.
- int CalleeSaveBaseToFrameRecordOffset = 0;
-
- /// SignReturnAddress is true if PAC-RET is enabled for the function with
- /// defaults being sign non-leaf functions only, with the B key.
- bool SignReturnAddress = false;
-
- /// SignReturnAddressAll modifies the default PAC-RET mode to signing leaf
- /// functions as well.
- bool SignReturnAddressAll = false;
-
- /// SignWithBKey modifies the default PAC-RET mode to signing with the B key.
- bool SignWithBKey = false;
-
- /// BranchTargetEnforcement enables placing BTI instructions at potential
- /// indirect branch destinations.
- bool BranchTargetEnforcement = false;
-
+ // Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
+ // CalleeSavedStackSize) to the address of the frame record.
+ int CalleeSaveBaseToFrameRecordOffset = 0;
+
+ /// SignReturnAddress is true if PAC-RET is enabled for the function with
+ /// defaults being sign non-leaf functions only, with the B key.
+ bool SignReturnAddress = false;
+
+ /// SignReturnAddressAll modifies the default PAC-RET mode to signing leaf
+ /// functions as well.
+ bool SignReturnAddressAll = false;
+
+ /// SignWithBKey modifies the default PAC-RET mode to signing with the B key.
+ bool SignWithBKey = false;
+
+ /// BranchTargetEnforcement enables placing BTI instructions at potential
+ /// indirect branch destinations.
+ bool BranchTargetEnforcement = false;
+
public:
- explicit AArch64FunctionInfo(MachineFunction &MF);
+ explicit AArch64FunctionInfo(MachineFunction &MF);
void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
@@ -297,14 +297,14 @@ public:
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
unsigned getJumpTableEntrySize(int Idx) const {
- return JumpTableEntryInfo[Idx].first;
+ return JumpTableEntryInfo[Idx].first;
}
MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
- return JumpTableEntryInfo[Idx].second;
+ return JumpTableEntryInfo[Idx].second;
}
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
- if ((unsigned)Idx >= JumpTableEntryInfo.size())
- JumpTableEntryInfo.resize(Idx+1);
+ if ((unsigned)Idx >= JumpTableEntryInfo.size())
+ JumpTableEntryInfo.resize(Idx+1);
JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
}
@@ -346,11 +346,11 @@ public:
return ForwardedMustTailRegParms;
}
- Optional<int> getTaggedBasePointerIndex() const {
- return TaggedBasePointerIndex;
- }
- void setTaggedBasePointerIndex(int Index) { TaggedBasePointerIndex = Index; }
-
+ Optional<int> getTaggedBasePointerIndex() const {
+ return TaggedBasePointerIndex;
+ }
+ void setTaggedBasePointerIndex(int Index) { TaggedBasePointerIndex = Index; }
+
unsigned getTaggedBasePointerOffset() const {
return TaggedBasePointerOffset;
}
@@ -358,26 +358,26 @@ public:
TaggedBasePointerOffset = Offset;
}
- int getCalleeSaveBaseToFrameRecordOffset() const {
- return CalleeSaveBaseToFrameRecordOffset;
- }
- void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
- CalleeSaveBaseToFrameRecordOffset = Offset;
- }
-
- bool shouldSignReturnAddress() const;
- bool shouldSignReturnAddress(bool SpillsLR) const;
-
- bool shouldSignWithBKey() const { return SignWithBKey; }
-
- bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
-
+ int getCalleeSaveBaseToFrameRecordOffset() const {
+ return CalleeSaveBaseToFrameRecordOffset;
+ }
+ void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
+ CalleeSaveBaseToFrameRecordOffset = Offset;
+ }
+
+ bool shouldSignReturnAddress() const;
+ bool shouldSignReturnAddress(bool SpillsLR) const;
+
+ bool shouldSignWithBKey() const { return SignWithBKey; }
+
+ bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
- SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo;
+ SmallVector<std::pair<unsigned, MCSymbol *>, 2> JumpTableEntryInfo;
};
namespace yaml {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
index f3b8ef16d6..0e9cb143f2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -21,7 +21,7 @@ namespace {
/// CMN, CMP, TST followed by Bcc
static bool isArithmeticBccPair(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI, bool CmpOnly) {
+ const MachineInstr &SecondMI, bool CmpOnly) {
if (SecondMI.getOpcode() != AArch64::Bcc)
return false;
@@ -29,13 +29,13 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
if (FirstMI == nullptr)
return true;
- // If we're in CmpOnly mode, we only fuse arithmetic instructions that
- // discard their result.
- if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
- FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
- return false;
- }
-
+ // If we're in CmpOnly mode, we only fuse arithmetic instructions that
+ // discard their result.
+ if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
+ FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
+ return false;
+ }
+
switch (FirstMI->getOpcode()) {
case AArch64::ADDSWri:
case AArch64::ADDSWrr:
@@ -387,11 +387,11 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
// All checking functions assume that the 1st instr is a wildcard if it is
// unspecified.
- if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
- bool CmpOnly = !ST.hasArithmeticBccFusion();
- if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
- return true;
- }
+ if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
+ bool CmpOnly = !ST.hasArithmeticBccFusion();
+ if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
+ return true;
+ }
if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
return true;
if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 019220e3a5..82b610f995 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -408,11 +408,11 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
O.getReg() != CmpReg;
}))
continue;
-
- // Don't remove a move immediate that implicitly defines the upper
- // bits as different.
- if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
- continue;
+
+ // Don't remove a move immediate that implicitly defines the upper
+ // bits as different.
+ if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
+ continue;
}
if (IsCopy)
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
index f90856d14b..2aeea84ae2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
@@ -240,14 +240,14 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
}
-const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
- const MachineFunction &MF) const {
- if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
- return CSR_AArch64_AAPCS_RegMask;
-
- return nullptr;
-}
-
+const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
+ const MachineFunction &MF) const {
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
+ return CSR_AArch64_AAPCS_RegMask;
+
+ return nullptr;
+}
+
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
return CSR_Darwin_AArch64_TLS_RegMask;
@@ -334,16 +334,16 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
}
bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
- return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
- return isReservedReg(MF, r);
- });
+ return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) {
+ return isReservedReg(MF, r);
+ });
}
void AArch64RegisterInfo::emitReservedArgRegCallError(
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
- F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
- " function calls if any of the argument registers is reserved.")});
+ F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
+ " function calls if any of the argument registers is reserved.")});
}
bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
@@ -525,16 +525,16 @@ bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
Register BaseReg,
int64_t Offset) const {
assert(MI && "Unable to get the legal offset for nil instruction.");
- StackOffset SaveOffset = StackOffset::getFixed(Offset);
+ StackOffset SaveOffset = StackOffset::getFixed(Offset);
return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
}
/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
/// at the beginning of the basic block.
-Register
-AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- int FrameIdx,
- int64_t Offset) const {
+Register
+AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ int FrameIdx,
+ int64_t Offset) const {
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
@@ -544,7 +544,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+ Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -552,21 +552,21 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx)
.addImm(Offset)
.addImm(Shifter);
-
- return BaseReg;
+
+ return BaseReg;
}
void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const {
// ARM doesn't need the general 64-bit offsets
- StackOffset Off = StackOffset::getFixed(Offset);
+ StackOffset Off = StackOffset::getFixed(Offset);
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
-
+
const MachineFunction *MF = MI.getParent()->getParent();
const AArch64InstrInfo *TII =
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
@@ -596,33 +596,33 @@ createScratchRegisterForInstruction(MachineInstr &MI,
}
}
-void AArch64RegisterInfo::getOffsetOpcodes(
- const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
- // The smallest scalable element supported by scaled SVE addressing
- // modes are predicates, which are 2 scalable bytes in size. So the scalable
- // byte offset must always be a multiple of 2.
- assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
-
- // Add fixed-sized offset using existing DIExpression interface.
- DIExpression::appendOffset(Ops, Offset.getFixed());
-
- unsigned VG = getDwarfRegNum(AArch64::VG, true);
- int64_t VGSized = Offset.getScalable() / 2;
- if (VGSized > 0) {
- Ops.push_back(dwarf::DW_OP_constu);
- Ops.push_back(VGSized);
- Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
- Ops.push_back(dwarf::DW_OP_mul);
- Ops.push_back(dwarf::DW_OP_plus);
- } else if (VGSized < 0) {
- Ops.push_back(dwarf::DW_OP_constu);
- Ops.push_back(-VGSized);
- Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
- Ops.push_back(dwarf::DW_OP_mul);
- Ops.push_back(dwarf::DW_OP_minus);
- }
-}
-
+void AArch64RegisterInfo::getOffsetOpcodes(
+ const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
+
+ // Add fixed-sized offset using existing DIExpression interface.
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+
+ unsigned VG = getDwarfRegNum(AArch64::VG, true);
+ int64_t VGSized = Offset.getScalable() / 2;
+ if (VGSized > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(VGSized);
+ Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_plus);
+ } else if (VGSized < 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(-VGSized);
+ Ops.append({dwarf::DW_OP_bregx, VG, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_minus);
+ }
+}
+
void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -640,26 +640,26 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
Register FrameReg;
- // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
- if (MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT ||
- MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
+ if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) {
StackOffset Offset =
TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
/*PreferFP=*/true,
/*ForSimm=*/false);
- Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
+ Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
return;
}
if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
- assert(!Offset.getScalable() &&
- "Frame offsets with a scalable component are not supported");
- FI.ChangeToImmediate(Offset.getFixed());
+ StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
+ assert(!Offset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ FI.ChangeToImmediate(Offset.getFixed());
return;
}
@@ -668,11 +668,11 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// TAGPstack must use the virtual frame register in its 3rd operand.
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
- Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
- AFI->getTaggedBasePointerOffset());
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
+ AFI->getTaggedBasePointerOffset());
} else if (Tagged) {
- StackOffset SPOffset = StackOffset::getFixed(
- MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
+ StackOffset SPOffset = StackOffset::getFixed(
+ MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize());
if (MFI.hasVarSizedObjects() ||
isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
(AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
@@ -693,8 +693,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
FrameReg = AArch64::SP;
- Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
- (int64_t)MFI.getStackSize());
+ Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) +
+ (int64_t)MFI.getStackSize());
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
@@ -765,19 +765,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister(
return getBaseRegister();
return getFrameRegister(MF);
}
-
-/// SrcRC and DstRC will be morphed into NewRC if this returns true
-bool AArch64RegisterInfo::shouldCoalesce(
- MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
- const TargetRegisterClass *DstRC, unsigned DstSubReg,
- const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
- if (MI->isCopy() &&
- ((DstRC->getID() == AArch64::GPR64RegClassID) ||
- (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
- MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
- // Do not coalesce in the case of a 32-bit subregister copy
- // which implements a 32 to 64 bit zero extension
- // which relies on the upper 32 bits being zeroed.
- return false;
- return true;
-}
+
+/// SrcRC and DstRC will be morphed into NewRC if this returns true
+bool AArch64RegisterInfo::shouldCoalesce(
+ MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ if (MI->isCopy() &&
+ ((DstRC->getID() == AArch64::GPR64RegClassID) ||
+ (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
+ MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
+ // Do not coalesce in the case of a 32-bit subregister copy
+ // which implements a 32 to 64 bit zero extension
+ // which relies on the upper 32 bits being zeroed.
+ return false;
+ return true;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
index 0c871ac089..b9a4e6ac16 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -72,10 +72,10 @@ public:
// Funclets on ARM64 Windows don't preserve any registers.
const uint32_t *getNoPreservedMask() const override;
- // Unwinders may not preserve all Neon and SVE registers.
- const uint32_t *
- getCustomEHPadPreservedMask(const MachineFunction &MF) const override;
-
+ // Unwinders may not preserve all Neon and SVE registers.
+ const uint32_t *
+ getCustomEHPadPreservedMask(const MachineFunction &MF) const override;
+
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i64 first argument if the calling convention
/// is one that can (partially) model this attribute with a preserved mask
@@ -107,8 +107,8 @@ public:
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
int64_t Offset) const override;
- Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
- int64_t Offset) const override;
+ Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
int64_t Offset) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
@@ -128,15 +128,15 @@ public:
unsigned getLocalAddressRegister(const MachineFunction &MF) const;
bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
-
- /// SrcRC and DstRC will be morphed into NewRC if this returns true
- bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
- unsigned SubReg, const TargetRegisterClass *DstRC,
- unsigned DstSubReg, const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const override;
-
- void getOffsetOpcodes(const StackOffset &Offset,
- SmallVectorImpl<uint64_t> &Ops) const override;
+
+ /// SrcRC and DstRC will be morphed into NewRC if this returns true
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC,
+ unsigned DstSubReg, const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
+
+ void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const override;
};
} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
index 28d1988b8a..17ad5b997c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -711,32 +711,32 @@ def XSeqPairClassOperand :
//===----- END: v8.1a atomic CASP register operands -----------------------===//
-//===----------------------------------------------------------------------===//
-// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
-// starting with an even one
-
-let Namespace = "AArch64" in {
- foreach i = 0-7 in
- def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
-}
-
-def Tuples8X : RegisterTuples<
- !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
- !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
-
-def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
-def GPR64x8AsmOp : AsmOperandClass {
- let Name = "GPR64x8";
- let ParserMethod = "tryParseGPR64x8";
- let RenderMethod = "addRegOperands";
-}
-def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
- let ParserMatchClass = GPR64x8AsmOp;
- let PrintMethod = "printGPR64x8";
-}
-
-//===----- END: v8.7a accelerator extension register operands -------------===//
-
+//===----------------------------------------------------------------------===//
+// Armv8.7a accelerator extension register operands: 8 consecutive GPRs
+// starting with an even one
+
+let Namespace = "AArch64" in {
+ foreach i = 0-7 in
+ def "x8sub_"#i : SubRegIndex<64, !mul(64, i)>;
+}
+
+def Tuples8X : RegisterTuples<
+ !foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
+ !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
+
+def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
+def GPR64x8AsmOp : AsmOperandClass {
+ let Name = "GPR64x8";
+ let ParserMethod = "tryParseGPR64x8";
+ let RenderMethod = "addRegOperands";
+}
+def GPR64x8 : RegisterOperand<GPR64x8Class, "printGPR64x8"> {
+ let ParserMatchClass = GPR64x8AsmOp;
+ let PrintMethod = "printGPR64x8";
+}
+
+//===----- END: v8.7a accelerator extension register operands -------------===//
+
// SVE predicate registers
def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>;
def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 03b32967a2..84e6327550 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -221,9 +221,9 @@ shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc,
// if so, return it.
std::string Subtarget = std::string(SchedModel.getSubtargetInfo()->getCPU());
auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget);
- auto It = SIMDInstrTable.find(InstID);
- if (It != SIMDInstrTable.end())
- return It->second;
+ auto It = SIMDInstrTable.find(InstID);
+ if (It != SIMDInstrTable.end())
+ return It->second;
unsigned SCIdx = InstDesc->getSchedClass();
const MCSchedClassDesc *SCDesc =
@@ -291,9 +291,9 @@ bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) {
case Interleave:
std::string Subtarget =
std::string(SchedModel.getSubtargetInfo()->getCPU());
- auto It = InterlEarlyExit.find(Subtarget);
- if (It != InterlEarlyExit.end())
- return It->second;
+ auto It = InterlEarlyExit.find(Subtarget);
+ if (It != InterlEarlyExit.end())
+ return It->second;
for (auto &I : IRT) {
OriginalMCID = &TII->get(I.OrigOpc);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e09b8401c0..19a71f606b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -152,8 +152,8 @@ def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
-def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>;
-def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>;
+def AArch64saddv_p : SDNode<"AArch64ISD::SADDV_PRED", SDT_AArch64Reduce>;
+def AArch64uaddv_p : SDNode<"AArch64ISD::UADDV_PRED", SDT_AArch64Reduce>;
def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
@@ -166,84 +166,84 @@ def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;
def SDT_AArch64Arith : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>
]>;
def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
]>;
// Predicated operations with the result of inactive lanes being unspecified.
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
-def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
+def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
-def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
+def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
-def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
-def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
-def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
-def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
-def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
-def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
-def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
+def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
+def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
+def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
+def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
+def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
+def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
+def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
-def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
-def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
-def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
+def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
+def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
+def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
-def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
-def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
-
-def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
-]>;
-
-// Predicated operations with the result of inactive lanes provided by the last operand.
-def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
-def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
-def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>;
-def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
-
-// These are like the above but we don't yet have need for ISD nodes. They allow
-// a single pattern to match intrinsic and ISD operand layouts.
-def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>;
-def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
-def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>;
-
-def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
- SDTCVecEltisVT<1,i1>
-]>;
-
-def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>,
- SDTCVecEltisVT<1,i1>
-]>;
-
-def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>;
-def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-
+def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
+def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
+
+def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
+]>;
+
+// Predicated operations with the result of inactive lanes provided by the last operand.
+def AArch64clz_mt : SDNode<"AArch64ISD::CTLZ_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
+def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
+def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fsqrt_mt : SDNode<"AArch64ISD::FSQRT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frecpx_mt : SDNode<"AArch64ISD::FRECPX_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
+
+// These are like the above but we don't yet have need for ISD nodes. They allow
+// a single pattern to match intrinsic and ISD operand layouts.
+def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>;
+def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
+def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>;
+
+def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
+ SDTCVecEltisVT<1,i1>
+]>;
+
+def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>,
+ SDTCVecEltisVT<1,i1>
+]>;
+
+def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>;
+def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
+
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
@@ -263,24 +263,24 @@ def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
-def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoge node:$lhs, node:$rhs),
- (setge node:$lhs, node:$rhs)]>;
-def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
- [(setogt node:$lhs, node:$rhs),
- (setgt node:$lhs, node:$rhs)]>;
-def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
- [(setoeq node:$lhs, node:$rhs),
- (seteq node:$lhs, node:$rhs)]>;
-def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
- [(setone node:$lhs, node:$rhs),
- (setne node:$lhs, node:$rhs)]>;
-def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
- (AArch64mul_p node:$pred, node:$src1, node:$src2), [{
- return N->hasOneUse();
-}]>;
-
-
+def setoge_or_setge : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setoge node:$lhs, node:$rhs),
+ (setge node:$lhs, node:$rhs)]>;
+def setogt_or_setgt : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setogt node:$lhs, node:$rhs),
+ (setgt node:$lhs, node:$rhs)]>;
+def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setoeq node:$lhs, node:$rhs),
+ (seteq node:$lhs, node:$rhs)]>;
+def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs),
+ [(setone node:$lhs, node:$rhs),
+ (setne node:$lhs, node:$rhs)]>;
+def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
+ (AArch64mul_p node:$pred, node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -305,7 +305,7 @@ let Predicates = [HasSVE] in {
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
- defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
+ defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
@@ -328,12 +328,12 @@ let Predicates = [HasSVE] in {
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
- defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>;
- defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>;
// SVE predicated integer reductions.
- defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
- defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>;
+ defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
+ defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", AArch64uaddv_p>;
defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>;
defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>;
defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>;
@@ -346,17 +346,17 @@ let Predicates = [HasSVE] in {
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
- defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
- defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
- defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
- defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
+ defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
+ defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
+ defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
+ defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
- defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>;
- defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>;
- defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
- defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
+ defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>;
+ defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
+ defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
- defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
+ defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
@@ -372,34 +372,34 @@ let Predicates = [HasSVE] in {
defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
- defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>;
- defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>;
- defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>;
- defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>;
- defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>;
- defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>;
- defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>;
-
- defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>;
- defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>;
- defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", AArch64cnt_mt>;
- defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", AArch64cnot_mt>;
- defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", AArch64not_mt>;
- defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
- defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
-
- defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
- defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
- defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
- defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
- defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
- defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
-
- defm SMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64smax_p>;
- defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
- defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
- defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
+ defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>;
+ defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>;
+ defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>;
+ defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>;
+ defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>;
+ defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>;
+ defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>;
+
+ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>;
+ defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>;
+ defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", AArch64cnt_mt>;
+ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", AArch64cnot_mt>;
+ defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", AArch64not_mt>;
+ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
+ defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
+
+ defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
+ defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
+ defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
+ defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
+ defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
+ defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
+
+ defm SMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64smax_p>;
+ defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
+ defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
+ defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
@@ -428,11 +428,11 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
defm FADD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fadd_p>;
- defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
- defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
- defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>;
- defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
- defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
+ defm FSUB_ZPZZ : sve_fp_bin_pred_hfd<AArch64fsub_p>;
+ defm FMUL_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmul_p>;
+ defm FMAXNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmaxnm_p>;
+ defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
+ defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@@ -449,10 +449,10 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
}
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
@@ -476,14 +476,14 @@ let Predicates = [HasSVE] in {
// regalloc.
def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
(FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
- def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
- (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
- def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
- (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
+ (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
(FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
- def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
- (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+ def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
+ (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
(FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
@@ -534,8 +534,8 @@ let Predicates = [HasSVE] in {
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
- def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
- (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
+ (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
// Duplicate +0.0 into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@@ -544,7 +544,7 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
- def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+ def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
// Duplicate Int immediate into all vector elements
def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
@@ -573,7 +573,7 @@ let Predicates = [HasSVE] in {
}
// Select elements from either vector (predicated)
- defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
+ defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>;
@@ -582,8 +582,8 @@ let Predicates = [HasSVE] in {
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
- defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>;
- defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>;
+ defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", AArch64rbit_mt>;
+ defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", AArch64revb_mt>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
@@ -1035,7 +1035,7 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
- multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
// reg + imm
let AddedComplexity = 2 in {
def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
@@ -1145,29 +1145,29 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
- // Extract subvectors from FP SVE vectors
- def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
- (UUNPKLO_ZZ_D ZPR:$Zs)>;
- def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))),
- (UUNPKHI_ZZ_D ZPR:$Zs)>;
- def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
- (UUNPKLO_ZZ_S ZPR:$Zs)>;
- def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
- (UUNPKHI_ZZ_S ZPR:$Zs)>;
- def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
- (UUNPKLO_ZZ_D ZPR:$Zs)>;
- def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
- (UUNPKHI_ZZ_D ZPR:$Zs)>;
-
- def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))),
- (UUNPKLO_ZZ_D ZPR:$Zs)>;
- def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))),
- (UUNPKHI_ZZ_D ZPR:$Zs)>;
- def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
- (UUNPKLO_ZZ_S ZPR:$Zs)>;
- def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
- (UUNPKHI_ZZ_S ZPR:$Zs)>;
-
+ // Extract subvectors from FP SVE vectors
+ def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+ (UUNPKHI_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+
+ def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D ZPR:$Zs)>;
+ def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
+ (UUNPKHI_ZZ_S ZPR:$Zs)>;
+
// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
@@ -1176,18 +1176,18 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
(UZP1_PPP_B $p1, $p2)>;
- // Concatenate two floating point vectors.
- def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
- (UZP1_ZZZ_S $v1, $v2)>;
- def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
- (UZP1_ZZZ_H $v1, $v2)>;
- def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
- (UZP1_ZZZ_S $v1, $v2)>;
- def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)),
- (UZP1_ZZZ_S $v1, $v2)>;
- def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)),
- (UZP1_ZZZ_H $v1, $v2)>;
-
+ // Concatenate two floating point vectors.
+ def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
+ (UZP1_ZZZ_H $v1, $v2)>;
+ def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv4bf16 (concat_vectors nxv2bf16:$v1, nxv2bf16:$v2)),
+ (UZP1_ZZZ_S $v1, $v2)>;
+ def : Pat<(nxv8bf16 (concat_vectors nxv4bf16:$v1, nxv4bf16:$v2)),
+ (UZP1_ZZZ_H $v1, $v2)>;
+
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
@@ -1217,10 +1217,10 @@ let Predicates = [HasSVE] in {
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;
- defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
- defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
- defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
- defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
+ defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge_or_setge>;
+ defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt_or_setgt>;
+ defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq_or_seteq>;
+ defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone_or_setne>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
@@ -1345,146 +1345,146 @@ let Predicates = [HasSVE] in {
defm INDEX_II : sve_int_index_ii<"index", index_vector>;
// Unpredicated shifts
- defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
- defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>;
- defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>;
+ defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
+ defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>;
+ defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>;
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
// Predicated shifts
- defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
- defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
- defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
-
- defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
- defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
- defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
-
+ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
+ defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
+ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+
+ defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
- defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
- defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
+ defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
}
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
- defm ASR_ZPZZ : sve_int_bin_pred_bhsd<AArch64asr_p>;
- defm LSR_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsr_p>;
- defm LSL_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsl_p>;
-
+ defm ASR_ZPZZ : sve_int_bin_pred_bhsd<AArch64asr_p>;
+ defm LSR_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsr_p>;
+ defm LSL_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsl_p>;
+
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
- defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>;
- defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
- defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
- defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
- defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
- defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
- defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
- defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
- defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
- defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
- defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
- defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
- defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
- defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
- defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
- defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
-
- def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
- (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- // FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
- // This is ignored by the pattern below where it is matched by (i64 timm0_1)
- def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
- (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- // Floating-point -> signed integer
- def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
- (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
- (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
- (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
- (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
- (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
- (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- // Floating-point -> unsigned integer
- def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
- (and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
- (and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
- (and (nxv4i32 ZPR:$Zs),
- (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
- (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
- (and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
- (and (nxv2i64 ZPR:$Zs),
- (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
- (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
-
- defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
- defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
- defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
- defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;
- defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;
- defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;
- defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
- defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
- defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
-
+ defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>;
+ defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>;
+ defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>;
+ defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>;
+ defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>;
+ defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>;
+ defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>;
+ defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>;
+ defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>;
+ defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+ defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
+
+ def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
+ (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
+ // This is ignored by the pattern below where it is matched by (i64 timm0_1)
+ def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
+ (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // Floating-point -> signed integer
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
+ (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
+ (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
+ (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ // Floating-point -> unsigned integer
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
+ (and (nxv4i32 ZPR:$Zs),
+ (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
+ (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
+ (and (nxv2i64 ZPR:$Zs),
+ (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
+ (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+
+ defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
+ defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;
+ defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", AArch64frintm_mt>;
+ defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", AArch64frintz_mt>;
+ defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", AArch64frinta_mt>;
+ defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", AArch64frintx_mt>;
+ defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
+ defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
+ defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
+
let Predicates = [HasBF16, HasSVE] in {
defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
@@ -1648,9 +1648,9 @@ let Predicates = [HasSVE] in {
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
- def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
- (ADDVL_XXI GPR64:$op, $imm)>;
-
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
// FIXME: BigEndian requires an additional REV instruction to satisfy the
// constraint that none of the bits change when stored to memory as one
// type, and and reloaded as another type.
@@ -1721,7 +1721,7 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
}
- // These allow casting from/to unpacked predicate types.
+ // These allow casting from/to unpacked predicate types.
def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
@@ -1736,18 +1736,18 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
- // These allow casting from/to unpacked floating-point types.
- def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
- def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
-
+ // These allow casting from/to unpacked floating-point types.
+ def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv2bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8bf16 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv4bf16 (reinterpret_cast (nxv8bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+ def : Pat<(nxv8bf16 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
+
def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
@@ -1800,7 +1800,7 @@ let Predicates = [HasSVE] in {
defm : pred_load<nxv8i16, nxv8i1, asext_masked_load_i8, LD1SB_H, LD1SB_H_IMM, am_sve_regreg_lsl0>;
defm : pred_load<nxv8i16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
defm : pred_load<nxv8f16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv8bf16, nxv8i1, nonext_masked_load, LD1H, LD1H_IMM, am_sve_regreg_lsl1>;
// 16-element contiguous loads
defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B, LD1B_IMM, am_sve_regreg_lsl0>;
@@ -1838,10 +1838,10 @@ let Predicates = [HasSVE] in {
defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous stores
- defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv8bf16, nxv8i1, nontrunc_masked_store, ST1H, ST1H_IMM, am_sve_regreg_lsl1>;
// 16-element contiguous stores
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B, ST1B_IMM, am_sve_regreg_lsl0>;
@@ -2003,7 +2003,7 @@ let Predicates = [HasSVE] in {
defm : ld1<LD1SB_H, LD1SB_H_IMM, nxv8i16, AArch64ld1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ld1<LD1H, LD1H_IMM, nxv8i16, AArch64ld1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
defm : ld1<LD1H, LD1H_IMM, nxv8f16, AArch64ld1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
- defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ defm : ld1<LD1H, LD1H_IMM, nxv8bf16, AArch64ld1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
// 16-element contiguous loads
defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -2043,7 +2043,7 @@ let Predicates = [HasSVE] in {
defm : ldnf1<LDNF1SB_H_IMM, nxv8i16, AArch64ldnf1s_z, nxv8i1, nxv8i8>;
defm : ldnf1<LDNF1H_IMM, nxv8i16, AArch64ldnf1_z, nxv8i1, nxv8i16>;
defm : ldnf1<LDNF1H_IMM, nxv8f16, AArch64ldnf1_z, nxv8i1, nxv8f16>;
- defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
+ defm : ldnf1<LDNF1H_IMM, nxv8bf16, AArch64ldnf1_z, nxv8i1, nxv8bf16>;
// 16-element contiguous non-faulting loads
defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1_z, nxv16i1, nxv16i8>;
@@ -2084,7 +2084,7 @@ let Predicates = [HasSVE] in {
defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s_z, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1_z, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1_z, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;
- defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
+ defm : ldff1<LDFF1H, nxv8bf16, AArch64ldff1_z, nxv8i1, nxv8bf16, am_sve_regreg_lsl1>;
// 16-element contiguous first faulting loads
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
@@ -2135,19 +2135,19 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
(INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
- def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
- (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
- def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
- (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
- def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
- (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
- def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
- (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
- def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
- (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
- def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
- (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
+ (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
// Insert scalar into vector[0]
def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
(CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
@@ -2211,28 +2211,28 @@ let Predicates = [HasSVE] in {
(DUP_ZR_D $index)),
$src)>;
- // Extract element from vector with scalar index
- def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
- (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
- def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
- (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
-
+ // Extract element from vector with scalar index
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
+ (LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_S (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+
// Extract element from vector with immediate index
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
@@ -2244,54 +2244,54 @@ let Predicates = [HasSVE] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
- def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
- (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
- def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
- (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
- def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
- (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
+ def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
- def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
- (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
+ def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>;
def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
- // Extract element from vector with immediate index that's within the bottom 128-bits.
- let AddedComplexity = 1 in {
- def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
- (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
- def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
- (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
- def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
- (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
- def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
- (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
- }
-
- // Extract first element from vector.
- let AddedComplexity = 2 in {
- def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
- (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
- def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
- (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
- def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
- (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
- def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
- (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
- def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
- (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
- def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
- (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
- def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
- (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
- def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
- (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
- def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
- (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
- def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
- (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
- }
+ // Extract element from vector with immediate index that's within the bottom 128-bits.
+ let AddedComplexity = 1 in {
+ def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
+ (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+ def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
+ (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+ def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ (i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
+ def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
+ (i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
+ }
+
+ // Extract first element from vector.
+ let AddedComplexity = 2 in {
+ def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv8i16 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv4i32 ZPR:$Zs), (i64 0)),
+ (i32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2i64 ZPR:$Zs), (i64 0)),
+ (i64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
+ (f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
+ (f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
+ def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
+ (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
+ }
}
let Predicates = [HasSVE, HasMatMulInt8] in {
@@ -2350,10 +2350,10 @@ let Predicates = [HasSVE2] in {
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
// SVE2 integer multiply vectors (unpredicated)
- defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
+ defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
- defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
+ defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
// Add patterns for unpredicated version of smulh and umulh.
def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
@@ -2372,7 +2372,7 @@ let Predicates = [HasSVE2] in {
(UMULH_ZZZ_S $Op1, $Op2)>;
def : Pat<(nxv2i64 (int_aarch64_sve_umulh (nxv2i1 (AArch64ptrue 31)), nxv2i64:$Op1, nxv2i64:$Op2)),
(UMULH_ZZZ_D $Op1, $Op2)>;
-
+
// SVE2 complex integer dot product (indexed)
defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot", int_aarch64_sve_cdot_lane>;
@@ -2494,11 +2494,11 @@ let Predicates = [HasSVE2] in {
}
// SVE2 predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
- defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
index 50911fd22b..0b45a3ba09 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA55.td
@@ -1,339 +1,339 @@
-//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the ARM Cortex-A55 processors.
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
-
-// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
-def CortexA55Model : SchedMachineModel {
- let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor
- let IssueWidth = 2; // It dual-issues under most circumstances
- let LoadLatency = 4; // Cycles for loads to access the cache. The
- // optimisation guide shows that most loads have
- // a latency of 3, but some have a latency of 4
- // or 5. Setting it 4 looked to be good trade-off.
- let MispredictPenalty = 8; // A branch direction mispredict.
- let PostRAScheduler = 1; // Enable PostRA scheduler pass.
- let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
-
- list<Predicate> UnsupportedFeatures = [HasSVE];
-
- // FIXME: Remove when all errors have been fixed.
- let FullInstRWOverlapCheck = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available.
-
-// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
-// Cortex-A55 is in-order.
-
-def CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
-def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
-def CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
-def CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe
-def CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe
-def CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
-
-// The FP DIV/SQRT instructions execute totally differently from the FP ALU
-// instructions, which can mostly be dual-issued; that's why for now we model
-// them with 2 resources.
-def CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
-def CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC
-def CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types
-
-let SchedModel = CortexA55Model in {
-
-// These latencies are modeled without taking into account forwarding paths
-// (the software optimisation guide lists latencies taking into account
-// typical forwarding paths).
-def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; } // MOVN, MOVZ
-def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; } // ALU
-def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg
-def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg
-def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; } // EXTR from a reg pair
-def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; } // Shift/Scale
-
-// MAC
-def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply
-def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply
-
-// Div
-def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8];
-}
-def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8];
-}
-
-// Load
-def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
-def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
-def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
-
-// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
-// below, choosing the median of 3 which makes the latency 6.
-// An extra cycle is needed to get the swizzling right.
-def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
- let ResourceCycles = [3]; }
-def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
-def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
- let ResourceCycles = [2]; }
-def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
- let ResourceCycles = [3]; }
-def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
- let ResourceCycles = [4]; }
-def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
- let ResourceCycles = [5]; }
-def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
- let ResourceCycles = [6]; }
-def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
- let ResourceCycles = [7]; }
-def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
- let ResourceCycles = [8]; }
-
-// Pre/Post Indexing - Performed as part of address generation
-def : WriteRes<WriteAdr, []> { let Latency = 0; }
-
-// Store
-def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
-def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
-
-// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
-def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [2];}
-def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
-def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [2]; }
-def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
- let ResourceCycles = [3]; }
-def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
- let ResourceCycles = [4]; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-
-// Branch
-def : WriteRes<WriteBr, [CortexA55UnitB]>;
-def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
-def : WriteRes<WriteSys, [CortexA55UnitB]>;
-def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
-def : WriteRes<WriteHint, [CortexA55UnitB]>;
-
-// FP ALU
-// As WriteF result is produced in F5 and it can be mostly forwarded
-// to consumer at F1, the effectively latency is set as 4.
-def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
-def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
-def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
-
-// FP ALU specific new schedwrite definitions
-def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
-def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
-def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
-
-// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
-def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
-def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [29]; }
-def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
-def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
- let ResourceCycles = [5]; }
-def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
- let ResourceCycles = [10]; }
-def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [19]; }
-def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
- let ResourceCycles = [5]; }
-def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
- let ResourceCycles = [9]; }
-def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
- let ResourceCycles = [19]; }
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedRead types.
-
-def : ReadAdvance<ReadVLD, 0>;
-def : ReadAdvance<ReadExtrHi, 1>;
-def : ReadAdvance<ReadAdrBase, 1>;
-
-// ALU - ALU input operands are generally needed in EX1. An operand produced in
-// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
-// allowing back-to-back ALU operations such as add. If an operand requires
-// a shift, it will, however, be required in ISS stage.
-def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-// Shifted operand
-def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-def CortexA55ReadISReg : SchedReadVariant<[
- SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
- SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
-def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
-
-def CortexA55ReadIEReg : SchedReadVariant<[
- SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
- SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
-def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
-
-// MUL
-def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-
-// Div
-def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
- WriteISReg, WriteIEReg,WriteIS,
- WriteID32,WriteID64,
- WriteIM32,WriteIM64]>;
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific InstRWs.
-
-//---
-// Miscellaneous
-//---
-def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
-def : InstRW<[WriteI], (instrs COPY)>;
-//---
-// Vector Loads - 64-bit per cycle
-//---
-// 1-element structures
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
-def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
-
-// 2-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
-
-// 3-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
-def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
-
-// 4-element structures
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
-def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
-def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
-def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
-
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
-def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
-
-//---
-// Vector Stores
-//---
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
-
-def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-//---
-// Floating Point Conversions, MAC, DIV, SQRT
-//---
-def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
-
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
-def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
-
-def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
-def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
-def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
-def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
-def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
-def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
-def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
-def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
-def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
-def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
-def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-}
+//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-A55 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
+def CortexA55Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor
+ let IssueWidth = 2; // It dual-issues under most circumstances
+ let LoadLatency = 4; // Cycles for loads to access the cache. The
+ // optimisation guide shows that most loads have
+ // a latency of 3, but some have a latency of 4
+ // or 5. Setting it 4 looked to be good trade-off.
+ let MispredictPenalty = 8; // A branch direction mispredict.
+ let PostRAScheduler = 1; // Enable PostRA scheduler pass.
+ let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
+
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
+// Cortex-A55 is in-order.
+
+def CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
+def CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
+def CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe
+def CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe
+def CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
+
+// The FP DIV/SQRT instructions execute totally differently from the FP ALU
+// instructions, which can mostly be dual-issued; that's why for now we model
+// them with 2 resources.
+def CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC
+def CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types
+
+let SchedModel = CortexA55Model in {
+
+// These latencies are modeled without taking into account forwarding paths
+// (the software optimisation guide lists latencies taking into account
+// typical forwarding paths).
+def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; } // MOVN, MOVZ
+def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; } // ALU
+def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; } // EXTR from a reg pair
+def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; } // Shift/Scale
+
+// MAC
+def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply
+def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply
+
+// Div
+def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8];
+}
+def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+// below, choosing the median of 3 which makes the latency 6.
+// An extra cycle is needed to get the swizzling right.
+def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
+def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
+ let ResourceCycles = [6]; }
+def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
+ let ResourceCycles = [7]; }
+def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
+ let ResourceCycles = [8]; }
+
+// Pre/Post Indexing - Performed as part of address generation
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
+def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [2];}
+def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
+def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
+ let ResourceCycles = [4]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [CortexA55UnitB]>;
+def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
+def : WriteRes<WriteSys, [CortexA55UnitB]>;
+def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
+def : WriteRes<WriteHint, [CortexA55UnitB]>;
+
+// FP ALU
+// As WriteF result is produced in F5 and it can be mostly forwarded
+// to consumer at F1, the effectively latency is set as 4.
+def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
+def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
+
+// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
+def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
+def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
+
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
+def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
+def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [29]; }
+def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
+def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
+ let ResourceCycles = [10]; }
+def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 1>;
+
+// ALU - ALU input operands are generally needed in EX1. An operand produced in
+// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
+// allowing back-to-back ALU operations such as add. If an operand requires
+// a shift, it will, however, be required in ISS stage.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+// Shifted operand
+def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def CortexA55ReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
+ SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
+
+def CortexA55ReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
+ SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
+
+// MUL
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
+def : InstRW<[WriteI], (instrs COPY)>;
+//---
+// Vector Loads - 64-bit per cycle
+//---
+// 1-element structures
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
+def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// 2-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+// 3-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// 4-element structures
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
+def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
+def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
+def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+
+def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//---
+// Floating Point Conversions, MAC, DIV, SQRT
+//---
+def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
+
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
+def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
+
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
+def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
+def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
+def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
index aa5bec8088..0ee50541c0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57.td
@@ -93,7 +93,7 @@ def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
-def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
+def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
def : SchedAlias<WriteV, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
@@ -350,16 +350,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")
// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
-// Cortex A57 Software Optimization Guide Sec 3.14
-// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
-def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
-
+// Cortex A57 Software Optimization Guide Sec 3.14
+// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate
+def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>;
+
// ASIMD absolute diff accum, D-form
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
// ASIMD absolute diff accum, Q-form
-def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
// ASIMD absolute diff accum long
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>;
// ASIMD arith, reduce, 4H/4S
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
@@ -376,41 +376,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>
def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
// ASIMD multiply, D-form
-// MUL
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
-// PMUL, SQDMULH, SQRDMULH
-def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
-
+// MUL
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>;
+
// ASIMD multiply, Q-form
-// MUL
-def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
-// PMUL, SQDMULH, SQRDMULH
-def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// Cortex A57 Software Optimization Guide Sec 3.14
-def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
-def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
-
+// MUL
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// PMUL, SQDMULH, SQRDMULH
+def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// Cortex A57 Software Optimization Guide Sec 3.14
+def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
+def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>;
+
// ASIMD multiply accumulate, D-form
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
// ASIMD multiply accumulate, Q-form
-def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
// ASIMD multiply accumulate long
// ASIMD multiply accumulate saturating long
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>;
// ASIMD multiply long
-def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
-def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
+def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^(S|U)MULL")>;
+def : InstRW<[A57Write_5cyc_1W], (instregex "^SQDMULL")>;
def : InstRW<[A57Write_5cyc_1W], (instregex "^PMULL(v8i8|v16i8)")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^PMULL(v1i64|v2i64)")>;
// ASIMD pairwise add and accumulate
// ASIMD shift accumulate
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
-def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ADALP")>;
+def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^(S|SR|U|UR)SRA")>;
// ASIMD shift by immed, complex
def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?(Q|R){1,2}SHR")>;
@@ -487,22 +487,22 @@ def : InstRW<[A57Write_9cyc_3V], (instregex "^(FMAX|FMIN)(NM)?P(v4f32|v2f64|v2i6
def : InstRW<[A57Write_10cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv")>;
// ASIMD FP multiply, D-form, FZ
-def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A57Write_5cyc_1V_FP_Forward], (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
// ASIMD FP multiply, Q-form, FZ
-def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57Write_5cyc_2V_FP_Forward], (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP multiply accumulate, D-form, FZ
// ASIMD FP multiply accumulate, Q-form, FZ
def A57WriteFPVMAD : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
def A57WriteFPVMAQ : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 10; }
-
-// Cortex A57 Software Optimization Guide Sec 3.15
-// Advances from FP mul and mul-accum to mul-accum
-def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
-def A57ReadFPVMA6 : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
-
+
+// Cortex A57 Software Optimization Guide Sec 3.15
+// Advances from FP mul and mul-accum to mul-accum
+def A57ReadFPVMA5 : SchedReadAdvance<5, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+def A57ReadFPVMA6 : SchedReadAdvance<6, [A57WriteFPVMAD, A57WriteFPVMAQ, A57Write_5cyc_1V_FP_Forward, A57Write_5cyc_2V_FP_Forward]>;
+
def : InstRW<[A57WriteFPVMAD, A57ReadFPVMA5], (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+def : InstRW<[A57WriteFPVMAQ, A57ReadFPVMA6], (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP round, D-form
def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT[AIMNPXZ](v2f32)")>;
@@ -565,9 +565,9 @@ def : InstRW<[A57Write_6cyc_3V], (instregex "^(UZP|ZIP)(1|2)(v16i8|v8i16|v4i32|v
def : InstRW<[A57Write_5cyc_1V], (instregex "^F(ADD|SUB)[DS]rr")>;
-// Cortex A57 Software Optimization Guide Sec 3.10
+// Cortex A57 Software Optimization Guide Sec 3.10
def A57WriteFPMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
-def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
+def A57ReadFPMA5 : SchedReadAdvance<5, [A57WriteFPMA, WriteFMul]>;
def A57ReadFPM : SchedReadAdvance<0>;
def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index a4c090d439..2ec3233887 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -13,11 +13,11 @@
// Prefix: A57Write
// Latency: #cyc
// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
-// Postfix (optional): (XYZ)_Forward
-//
-// The postfix is added to differentiate SchedWriteRes that are used in
-// subsequent SchedReadAdvances.
+// Postfix (optional): (XYZ)_Forward
//
+// The postfix is added to differentiate SchedWriteRes that are used in
+// subsequent SchedReadAdvances.
+//
// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
//
@@ -29,9 +29,9 @@
def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
-def A57Write_5cyc_1V_FP_Forward : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1V_FP_Forward : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
-def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_5cyc_1W_Mul_Forward : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
let ResourceCycles = [17]; }
@@ -51,7 +51,7 @@ def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
-def A57Write_4cyc_1X_NonMul_Forward : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_4cyc_1X_NonMul_Forward : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
@@ -100,10 +100,10 @@ def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 6;
let NumMicroOps = 2;
}
-def A57Write_6cyc_2W_Mul_Forward : SchedWriteRes<[A57UnitW, A57UnitW]> {
- let Latency = 6;
- let NumMicroOps = 2;
-}
+def A57Write_6cyc_2W_Mul_Forward : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
A57UnitL]> {
let Latency = 5;
@@ -113,18 +113,18 @@ def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
let Latency = 5;
let NumMicroOps = 2;
}
-def A57Write_5cyc_2V_FP_Forward : SchedWriteRes<[A57UnitV, A57UnitV]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
+def A57Write_5cyc_2V_FP_Forward : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 5;
let NumMicroOps = 2;
}
-def A57Write_5cyc_2X_NonMul_Forward : SchedWriteRes<[A57UnitX, A57UnitX]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
+def A57Write_5cyc_2X_NonMul_Forward : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
A57UnitV]> {
let Latency = 10;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
index b6741d418e..3c5a8d033d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -1,3890 +1,3890 @@
-//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the scheduling model for the Fujitsu A64FX processors.
-//
-//===----------------------------------------------------------------------===//
-
-def A64FXModel : SchedMachineModel {
- let IssueWidth = 6; // 6 micro-ops dispatched at a time.
- let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
- let LoadLatency = 5; // Optimistic load latency.
- let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
- // Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 128;
- let PostRAScheduler = 1; // Using PostRA sched.
- let CompleteModel = 1;
-
- list<Predicate> UnsupportedFeatures =
- [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
-
- let FullInstRWOverlapCheck = 0;
-}
-
-let SchedModel = A64FXModel in {
-
-// Define the issue ports.
-
-// A64FXIP*
-
-// Port 0
-def A64FXIPFLA : ProcResource<1>;
-
-// Port 1
-def A64FXIPPR : ProcResource<1>;
-
-// Port 2
-def A64FXIPEXA : ProcResource<1>;
-
-// Port 3
-def A64FXIPFLB : ProcResource<1>;
-
-// Port 4
-def A64FXIPEXB : ProcResource<1>;
-
-// Port 5
-def A64FXIPEAGA : ProcResource<1>;
-
-// Port 6
-def A64FXIPEAGB : ProcResource<1>;
-
-// Port 7
-def A64FXIPBR : ProcResource<1>;
-
-// Define groups for the functional units on each issue port. Each group
-// created will be used by a WriteRes later on.
-
-def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
-
-def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
-
-def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
-
-def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
-
-def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
-
-def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
-
-def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
-
-def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
-
-def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
-
-def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
-
-def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>;
-
-def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>;
-
-def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>;
-
-def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>;
-
-def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
-
-def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>;
-
-def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
-
-def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
- A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> {
- let BufferSize = 60;
-}
-
-def A64FXWrite_6Cyc : SchedWriteRes<[]> {
- let Latency = 6;
-}
-
-def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
- let Latency = 1;
-}
-
-def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 2;
-}
-
-def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 4;
-}
-
-def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 5;
-}
-
-def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 8;
-}
-
-def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 9;
-}
-
-def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 13;
-}
-
-def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 37;
-}
-
-def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 98;
-}
-
-def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 134;
-}
-
-def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 154;
-}
-
-def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
- let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
- let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
- let Latency = 8;
-}
-
-def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
- let Latency = 12;
-}
-
-def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
- let Latency = 10;
-}
-
-def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
- let Latency = 17;
-}
-
-def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
- let Latency = 21;
-}
-
-def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
- let Latency = 3;
-}
-
-def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> {
- let Latency = 3;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> {
- let Latency = 4;
-}
-
-def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
- let Latency = 3;
-}
-
-def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
- let Latency = 5;
-}
-
-def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
- let Latency = 6;
-}
-
-def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
- let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
- let Latency = 6;
-}
-
-def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> {
- let Latency = 6;
-}
-
-def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 3;
-}
-
-def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 4;
-}
-
-def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 8;
-}
-
-def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 9;
-}
-
-def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
-}
-
-def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 12;
-}
-
-def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
-}
-
-def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 15;
-}
-
-def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 15;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 18;
-}
-
-def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 45;
-}
-
-def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 60;
-}
-
-def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 75;
-}
-
-def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> {
- let Latency = 6;
-}
-
-def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
- let Latency = 10;
-}
-
-def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
- let Latency = 12;
-}
-
-def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
- let Latency = 20;
-}
-
-def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
- let Latency = 5;
-}
-
-def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
- let Latency = 11;
-}
-
-def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
- let Latency = 5;
-}
-
-def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
- let Latency = 1;
-}
-
-def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
- let Latency = 2;
-}
-
-def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
- let Latency = 4;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> {
- let Latency = 6;
-}
-
-def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 1;
-}
-
-def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 5;
-}
-
-def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
-}
-
-def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
-}
-
-def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
- let Latency = 44;
-}
-
-def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
- let Latency = 10;
-}
-
-def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
- let Latency = 15;
-}
-
-def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
- let Latency = 19;
-}
-
-def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
- let Latency = 25;
-}
-
-def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
- let Latency = 14;
-}
-
-def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
- let Latency = 19;
-}
-
-def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
- let Latency = 29;
-}
-
-def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
- let Latency = 5;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 2;
-
-}
-
-def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
- let Latency = 8;
- let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
- let Latency = 11;
- let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 4;
-}
-
-def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 5;
-}
-
-def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 6;
-}
-
-def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 7;
-}
-
-def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 8;
-}
-
-def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
- let NumMicroOps = 9;
-}
-
-def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
- let Latency = 1;
-}
-
-def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
-}
-
-def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
-}
-
-def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> {
- let Latency = 25;
-}
-
-def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 12;
-}
-
-def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
-}
-
-def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
-}
-
-def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
- let Latency = 6;
-}
-
-def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
- let Latency = 8;
-}
-
-def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
-}
-
-def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 12;
- let NumMicroOps = 6;
-}
-
-def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
- let NumMicroOps = 6;
-}
-
-def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
- let Latency = 9;
-}
-
-def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
- let Latency = 8;
-}
-
-
-def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 8;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 2;
-}
-
-
-def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
- let Latency = 15;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 15;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 15;
- let NumMicroOps = 2;
-}
-
-def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
- let NumMicroOps = 7;
-}
-
-def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> {
- let Latency = 14;
-}
-
-def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
- let Latency = 5;
-}
-
-def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
-}
-
-def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 9;
-}
-
-def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
- let Latency = 12;
-}
-
-def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
- let Latency = 25;
-}
-
-def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 3;
-}
-
-def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 5;
-}
-
-def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 7;
-}
-
-def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
- let Latency = 10;
- let NumMicroOps = 9;
-}
-
-def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
- let Latency = 0;
-}
-
-def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
- let Latency = 1;
-}
-
-def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
- let Latency = 1;
-}
-
-def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
- let Latency = 1;
-}
-
-def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
- let Latency = 1;
-}
-
-def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> {
- let Latency = 6;
-}
-
-def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> {
- let Latency = 7;
-}
-
-def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> {
- let Latency = 8;
-}
-
-def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> {
- let Latency = 9;
-}
-
-def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> {
- let Latency = 15;
-}
-
-def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> {
- let Latency = 19;
-}
-
-def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
- let Latency = 7;
-}
-
-// Define commonly used read types.
-
-// No forwarding is provided for these types.
-def : ReadAdvance<ReadI, 0>;
-def : ReadAdvance<ReadISReg, 0>;
-def : ReadAdvance<ReadIEReg, 0>;
-def : ReadAdvance<ReadIM, 0>;
-def : ReadAdvance<ReadIMA, 0>;
-def : ReadAdvance<ReadID, 0>;
-def : ReadAdvance<ReadExtrHi, 0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD, 0>;
-
-//===----------------------------------------------------------------------===//
-// 3. Instruction Tables.
-
-//---
-// 3.1 Branch Instructions
-//---
-
-// Branch, immed
-// Branch and link, immed
-// Compare and branch
-def : WriteRes<WriteBr, [A64FXGI7]> {
- let Latency = 1;
-}
-
-// Branch, register
-// Branch and link, register != LR
-// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [A64FXGI7]> {
- let Latency = 1;
-}
-
-def : WriteRes<WriteSys, []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint, []> { let Latency = 1; }
-
-def : WriteRes<WriteAtomic, []> {
- let Latency = 4;
-}
-
-//---
-// Branch
-//---
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
-def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
-def : InstRW<[A64FXWrite_1Cyc_GI7],
- (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
-
-//---
-// 3.2 Arithmetic and Logical Instructions
-// 3.3 Move and Shift Instructions
-//---
-
-// ALU, basic
-// Conditional compare
-// Conditional select
-// Address generation
-def : WriteRes<WriteI, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteI],
- (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
- "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
- "ADC(W|X)r",
- "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
- "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
- "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
- "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
- "SBCS(W|X)r", "CCMN(W|X)(i|r)",
- "CCMP(W|X)(i|r)", "CSEL(W|X)r",
- "CSINC(W|X)r", "CSINV(W|X)r",
- "CSNEG(W|X)r")>;
-
-def : InstRW<[WriteI], (instrs COPY)>;
-
-// ALU, extend and/or shift
-def : WriteRes<WriteISReg, [A64FXGI2456]> {
- let Latency = 2;
- let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteISReg],
- (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
- "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
- "ADC(W|X)r",
- "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
- "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
- "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
- "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
- "SBCS(W|X)r", "CCMN(W|X)(i|r)",
- "CCMP(W|X)(i|r)", "CSEL(W|X)r",
- "CSINC(W|X)r", "CSINV(W|X)r",
- "CSNEG(W|X)r")>;
-
-def : WriteRes<WriteIEReg, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteIEReg],
- (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
- "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
- "ADC(W|X)r",
- "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
- "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
- "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
- "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
- "SBCS(W|X)r", "CCMN(W|X)(i|r)",
- "CCMP(W|X)(i|r)", "CSEL(W|X)r",
- "CSINC(W|X)r", "CSINV(W|X)r",
- "CSNEG(W|X)r")>;
-
-// Move immed
-def : WriteRes<WriteImm, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-def : InstRW<[A64FXWrite_1Cyc_GI2456],
- (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
-
-def : InstRW<[A64FXWrite_2Cyc_GI24],
- (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
-
-// Variable shift
-def : WriteRes<WriteIS, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-//---
-// 3.4 Divide and Multiply Instructions
-//---
-
-// Divide, W-form
-def : WriteRes<WriteID32, [A64FXGI4]> {
- let Latency = 39;
- let ResourceCycles = [39];
-}
-
-// Divide, X-form
-def : WriteRes<WriteID64, [A64FXGI4]> {
- let Latency = 23;
- let ResourceCycles = [23];
-}
-
-// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [A64FXGI2456]> {
- let Latency = 5;
- let ResourceCycles = [1];
-}
-
-// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [A64FXGI2456]> {
- let Latency = 5;
- let ResourceCycles = [1];
-}
-
-def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
-def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
-def : InstRW<[A64FXWrite_MADDL],
- (instregex "(S|U)(MADDL|MSUBL)rrr")>;
-
-def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
-def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
-
-// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-// Multiply high
-def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
-
-// Miscellaneous Data-Processing Instructions
-// Bitfield extract
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
-
-// Bitifield move - basic
-def : InstRW<[A64FXWrite_1Cyc_GI24],
- (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
-
-// Bitfield move, insert
-def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
-
-// Count leading
-def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
- "^CLZ(W|X)r$")>;
-
-// Reverse bits
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
-
-// Cryptography Extensions
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
-def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
-def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
-def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
-def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
-
-// CRC Instructions
-def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
-def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
-def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
-
-def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
-def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
-def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
-
-// Reverse bits/bytes
-// NOTE: Handled by WriteI.
-
-//---
-// 3.6 Load Instructions
-// 3.10 FP Load Instructions
-//---
-
-// Load register, literal
-// Load register, unscaled immed
-// Load register, immed unprivileged
-// Load register, unsigned immed
-def : WriteRes<WriteLD, [A64FXGI56]> {
- let Latency = 4;
- let ResourceCycles = [3];
-}
-
-// Load register, immed post-index
-// NOTE: Handled by WriteLD, WriteI.
-// Load register, immed pre-index
-// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [A64FXGI2456]> {
- let Latency = 1;
- let ResourceCycles = [1];
-}
-
-// Load pair, immed offset, normal
-// Load pair, immed offset, signed words, base != SP
-// Load pair, immed offset signed words, base = SP
-// LDP only breaks into *one* LS micro-op. Thus
-// the resources are handled by WriteLD.
-def : WriteRes<WriteLDHi, []> {
- let Latency = 5;
-}
-
-// Load register offset, basic
-// Load register, register offset, scale by 4/8
-// Load register, register offset, scale by 2
-// Load register offset, extend
-// Load register, register offset, extend, scale by 4/8
-// Load register, register offset, extend, scale by 2
-def A64FXWriteLDIdx : SchedWriteVariant<[
- SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
- SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>;
-def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
-
-def A64FXReadAdrBase : SchedReadVariant<[
- SchedVar<ScaledIdxPred, [ReadDefault]>,
- SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
-
-// Load pair, immed pre-index, normal
-// Load pair, immed pre-index, signed words
-// Load pair, immed post-index, normal
-// Load pair, immed post-index, signed words
-// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
-
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
-
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
-def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
-def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPDpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPQpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPSpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPWpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPWpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPDpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPQpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPSpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPWpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPDpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPQpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPSpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPWpre)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPXpre)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
-def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
-
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPDpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPQpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPSpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPWpost)>;
-def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
- (instrs LDPXpost)>;
-
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
-def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRBroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRDroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRHHroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRQroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSHWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSHXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRWroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRXroW)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRBroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRDroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRHHroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRQroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSHWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRSHXroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRWroX)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
- (instrs LDRXroX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
-def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
-
-//---
-// Prefetch
-//---
-def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
-def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
-
-//--
-// 3.7 Store Instructions
-// 3.11 FP Store Instructions
-//--
-
-// Store register, unscaled immed
-// Store register, immed unprivileged
-// Store register, unsigned immed
-def : WriteRes<WriteST, [A64FXGI56]> {
- let Latency = 1;
-}
-
-// Store register, immed post-index
-// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
-
-// Store register, immed pre-index
-// NOTE: Handled by WriteAdr, WriteST
-
-// Store register, register offset, basic
-// Store register, register offset, scaled by 4/8
-// Store register, register offset, scaled by 2
-// Store register, register offset, extend
-// Store register, register offset, extend, scale by 4/8
-// Store register, register offset, extend, scale by 1
-def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
- let Latency = 1;
-}
-
-// Store pair, immed offset, W-form
-// Store pair, immed offset, X-form
-def : WriteRes<WriteSTP, [A64FXGI56]> {
- let Latency = 1;
-}
-
-// Store pair, immed post-index, W-form
-// Store pair, immed post-index, X-form
-// Store pair, immed pre-index, W-form
-// Store pair, immed pre-index, X-form
-// NOTE: Handled by WriteAdr, WriteSTP.
-
-def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
-
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
-def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
-
-def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
-
-def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
-def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
-
-def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
-def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
-
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPDpre, STPDpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPQpre, STPQpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPSpre, STPSpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPWpre, STPWpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01],
- (instrs STPXpre, STPXpost)>;
-def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
- (instrs STPXpre, STPXpost)>;
-
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRBpre, STRBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRBBpre, STRBBpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRDpre, STRDpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRHpre, STRHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRHHpre, STRHHpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRQpre, STRQpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRSpre, STRSpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRWpre, STRWpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01],
- (instrs STRXpre, STRXpost)>;
-def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
- (instrs STRXpre, STRXpost)>;
-
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRBroW, STRBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRBroW, STRBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRBBroW, STRBBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRBBroW, STRBBroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRDroW, STRDroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRDroW, STRDroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRHroW, STRHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRHroW, STRHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRHHroW, STRHHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRHHroW, STRHHroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRQroW, STRQroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRQroW, STRQroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRSroW, STRSroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRSroW, STRSroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRWroW, STRWroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRWroW, STRWroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRXroW, STRXroX)>;
-def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
- (instrs STRXroW, STRXroX)>;
-
-//---
-// 3.8 FP Data Processing Instructions
-//---
-
-// FP absolute value
-// FP min/max
-// FP negate
-def : WriteRes<WriteF, [A64FXGI03]> {
- let Latency = 4;
- let ResourceCycles = [2];
-}
-
-// FP arithmetic
-
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
-
-// FP compare
-def : WriteRes<WriteFCmp, [A64FXGI03]> {
- let Latency = 4;
- let ResourceCycles = [2];
-}
-
-// FP Div, Sqrt
-def : WriteRes<WriteFDiv, [A64FXGI0]> {
- let Latency = 43;
-}
-
-def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
- let Latency = 38;
-}
-
-def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
- let Latency = 29;
-}
-
-def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
- let Latency = 43;
-}
-
-def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
- let Latency = 29;
-}
-
-def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
- let Latency = 43;
-}
-
-// FP divide, S-form
-// FP square root, S-form
-def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
-def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
-
-// FP divide, D-form
-// FP square root, D-form
-def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
-def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
-
-// FP multiply
-// FP multiply accumulate
-def : WriteRes<WriteFMul, [A64FXGI03]> {
- let Latency = 9;
- let ResourceCycles = [2];
-}
-
-def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> {
- let Latency = 9;
- let ResourceCycles = [2];
-}
-
-def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> {
- let Latency = 9;
- let ResourceCycles = [2];
-}
-
-def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>;
-def : InstRW<[A64FXXWriteFMulAcc],
- (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
-
-// FP round to integral
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
-
-// FP select
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
-
-//---
-// 3.9 FP Miscellaneous Instructions
-//---
-
-// FP convert, from vec to vec reg
-// FP convert, from gen to vec reg
-// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [A64FXGI03]> {
- let Latency = 9;
- let ResourceCycles = [2];
-}
-
-// FP move, immed
-// FP move, register
-def : WriteRes<WriteFImm, [A64FXGI0]> {
- let Latency = 4;
- let ResourceCycles = [2];
-}
-
-// FP transfer, from gen to vec reg
-// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [A64FXGI0]> {
- let Latency = 4;
- let ResourceCycles = [2];
-}
-
-def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
-def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
-
-//---
-// 3.12 ASIMD Integer Instructions
-//---
-
-// ASIMD absolute diff, D-form
-// ASIMD absolute diff, Q-form
-// ASIMD absolute diff accum, D-form
-// ASIMD absolute diff accum, Q-form
-// ASIMD absolute diff accum long
-// ASIMD absolute diff long
-// ASIMD arith, basic
-// ASIMD arith, complex
-// ASIMD compare
-// ASIMD logical (AND, BIC, EOR)
-// ASIMD max/min, basic
-// ASIMD max/min, reduce, 4H/4S
-// ASIMD max/min, reduce, 8B/8H
-// ASIMD max/min, reduce, 16B
-// ASIMD multiply, D-form
-// ASIMD multiply, Q-form
-// ASIMD multiply accumulate long
-// ASIMD multiply accumulate saturating long
-// ASIMD multiply long
-// ASIMD pairwise add and accumulate
-// ASIMD shift accumulate
-// ASIMD shift by immed, basic
-// ASIMD shift by immed and insert, basic, D-form
-// ASIMD shift by immed and insert, basic, Q-form
-// ASIMD shift by immed, complex
-// ASIMD shift by register, basic, D-form
-// ASIMD shift by register, basic, Q-form
-// ASIMD shift by register, complex, D-form
-// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [A64FXGI03]> {
- let Latency = 4;
- let ResourceCycles = [1];
-}
-
-// ASIMD arith, reduce, 4H/4S
-// ASIMD arith, reduce, 8B/8H
-// ASIMD arith, reduce, 16B
-
-// ASIMD logical (MVN (alias for NOT), ORN, ORR)
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
-
-// ASIMD arith, reduce
-def : InstRW<[A64FXWrite_ADDLV],
- (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
-
-// ASIMD polynomial (8x8) multiply long
-def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
-def : InstRW<[A64FXWrite_MULLV],
- (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
-def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
-def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
-
-// ASIMD absolute diff accum, D-form
-def : InstRW<[A64FXWrite_ABA],
- (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
-// ASIMD absolute diff accum, Q-form
-def : InstRW<[A64FXWrite_ABA],
- (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
-// ASIMD absolute diff accum long
-def : InstRW<[A64FXWrite_ABAL],
- (instregex "^[SU]ABAL")>;
-// ASIMD arith, reduce, 4H/4S
-def : InstRW<[A64FXWrite_ADDLV1],
- (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
-// ASIMD arith, reduce, 8B
-def : InstRW<[A64FXWrite_ADDLV1],
- (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
-// ASIMD arith, reduce, 16B/16H
-def : InstRW<[A64FXWrite_ADDLV1],
- (instregex "^[SU]?ADDL?Vv16i8v$")>;
-// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[A64FXWrite_MINMAXV],
- (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
-// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[A64FXWrite_MINMAXV],
- (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
-// ASIMD max/min, reduce, 16B/16H
-def : InstRW<[A64FXWrite_MINMAXV],
- (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
-// ASIMD multiply, D-form
-def : InstRW<[A64FXWrite_PMUL],
- (instregex "^(P?MUL|SQR?DMUL)" #
- "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
- "(_indexed)?$")>;
-
-// ASIMD multiply, Q-form
-def : InstRW<[A64FXWrite_PMUL],
- (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// ASIMD multiply, Q-form
-def : InstRW<[A64FXWrite_SQRDMULH],
- (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
-
-// ASIMD multiply accumulate, D-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
-// ASIMD multiply accumulate, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
-// ASIMD shift accumulate
-def : InstRW<[A64FXWrite_SRSRAV],
- (instregex "SRSRAv", "URSRAv")>;
-def : InstRW<[A64FXWrite_SSRAV],
- (instregex "SSRAv", "USRAv")>;
-
-// ASIMD shift by immed, basic
-def : InstRW<[A64FXWrite_RSHRN],
- (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
-def : InstRW<[A64FXWrite_SHRN],
- (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
-
-def : InstRW<[A64FXWrite_6Cyc_GI3],
- (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
-
-// ASIMD shift by immed, complex
-def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
-// ASIMD shift by register, basic, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
- (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
-// ASIMD shift by register, complex, D-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
- (instregex "^[SU][QR]{1,2}SHL" #
- "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
-// ASIMD shift by register, complex, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3],
- (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
-
-// ASIMD Arithmetic
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
-def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
- "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
-def : InstRW<[A64FXWrite_ADDP],
- (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
- "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
-def : InstRW<[A64FXWrite_4Cyc_GI0],
- (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
-def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
-def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
-def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
-def : InstRW<[A64FXWrite_MINMAXV],
- (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
-def : InstRW<[A64FXWrite_ABA],
- (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
-def : InstRW<[A64FXWrite_SHRN],
- (instregex "^ADDHNv", "^SUBHNv")>;
-def : InstRW<[A64FXWrite_RSHRN],
- (instregex "^RADDHNv", "^RSUBHNv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
- "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
- "^URHADD", "^USQADD")>;
-
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^CMEQv", "^CMGEv", "^CMGTv",
- "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
-def : InstRW<[A64FXWrite_MINMAXV],
- (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
-def : InstRW<[A64FXWrite_ADDP],
- (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^SABDv", "^UABDv")>;
-def : InstRW<[A64FXWrite_TBX1],
- (instregex "^SABDLv", "^UABDLv")>;
-
-//---
-// 3.13 ASIMD Floating-point Instructions
-//---
-
-// ASIMD FP absolute value
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
-
-// ASIMD FP arith, normal, D-form
-// ASIMD FP arith, normal, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^FABDv", "^FADDv", "^FSUBv")>;
-
-// ASIMD FP arith, pairwise, D-form
-// ASIMD FP arith, pairwise, Q-form
-def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
-
-// ASIMD FP compare, D-form
-// ASIMD FP compare, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
- "^FCMGTv", "^FCMLEv",
- "^FCMLTv")>;
-// ASIMD FP round, D-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^FRINT[AIMNPXZ](v2f32)")>;
-// ASIMD FP round, Q-form
-def : InstRW<[A64FXWrite_9Cyc_GI03],
- (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
-
-// ASIMD FP convert, long
-// ASIMD FP convert, narrow
-// ASIMD FP convert, other, D-form
-// ASIMD FP convert, other, Q-form
-
-// ASIMD FP convert, long and narrow
-def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
-// ASIMD FP convert, other, D-form
-def : InstRW<[A64FXWrite_FCVTXNV],
- (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
-// ASIMD FP convert, other, Q-form
-def : InstRW<[A64FXWrite_FCVTXNV],
- (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP divide, D-form, F32
-def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
-def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
-
-// ASIMD FP divide, Q-form, F32
-def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
-def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
-
-// ASIMD FP divide, Q-form, F64
-def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
-def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
-
-// ASIMD FP max/min, normal, D-form
-// ASIMD FP max/min, normal, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
- "^FMINv", "^FMINNMv")>;
-
-// ASIMD FP max/min, pairwise, D-form
-// ASIMD FP max/min, pairwise, Q-form
-def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
- "^FMINPv", "^FMINNMPv")>;
-
-// ASIMD FP max/min, reduce
-def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
- "^FMINVv", "^FMINNMVv")>;
-
-// ASIMD FP multiply, D-form, FZ
-// ASIMD FP multiply, D-form, no FZ
-// ASIMD FP multiply, Q-form, FZ
-// ASIMD FP multiply, Q-form, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
-def : InstRW<[A64FXWrite_FMULXE],
- (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
-def : InstRW<[A64FXWrite_FMULXE],
- (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP multiply accumulate, Dform, FZ
-// ASIMD FP multiply accumulate, Dform, no FZ
-// ASIMD FP multiply accumulate, Qform, FZ
-// ASIMD FP multiply accumulate, Qform, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
-def : InstRW<[A64FXWrite_FMULXE],
- (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
-def : InstRW<[A64FXWrite_FMULXE],
- (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
-
-// ASIMD FP negate
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
-
-//--
-// 3.14 ASIMD Miscellaneous Instructions
-//--
-
-// ASIMD bit reverse
-def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
-
-// ASIMD bitwise insert, D-form
-// ASIMD bitwise insert, Q-form
-def : InstRW<[A64FXWrite_BIF],
- (instregex "^BIFv", "^BITv", "^BSLv")>;
-
-// ASIMD count, D-form
-// ASIMD count, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI0],
- (instregex "^CLSv", "^CLZv", "^CNTv")>;
-
-// ASIMD duplicate, gen reg
-// ASIMD duplicate, element
-def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>;
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
-
-// ASIMD extract
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
-
-// ASIMD extract narrow
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
-
-// ASIMD extract narrow, saturating
-def : InstRW<[A64FXWrite_6Cyc_GI3],
- (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
-
-// ASIMD insert, element to element
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
-
-// ASIMD transfer, element to gen reg
-def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
-
-// ASIMD move, integer immed
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
-
-// ASIMD move, FP immed
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
-
-// ASIMD table lookup, D-form
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
-def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
-
-// ASIMD table lookup, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
-def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
-def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
-def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
-def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
-
-// ASIMD transpose
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>;
-
-// ASIMD unzip/zip
-def : InstRW<[A64FXWrite_6Cyc_GI0],
- (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
-
-// ASIMD reciprocal estimate, D-form
-// ASIMD reciprocal estimate, Q-form
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
- "^FRSQRTEv", "^URSQRTEv")>;
-
-// ASIMD reciprocal step, D-form, FZ
-// ASIMD reciprocal step, D-form, no FZ
-// ASIMD reciprocal step, Q-form, FZ
-// ASIMD reciprocal step, Q-form, no FZ
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
-
-// ASIMD reverse
-def : InstRW<[A64FXWrite_4Cyc_GI03],
- (instregex "^REV16v", "^REV32v", "^REV64v")>;
-
-// ASIMD table lookup, D-form
-// ASIMD table lookup, Q-form
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
-
-// ASIMD transfer, element to word or word
-def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
-
-// ASIMD transfer, element to gen reg
-def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
-
-// ASIMD transfer gen reg to element
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
-
-// ASIMD transpose
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
- "^UZP1v", "^UZP2v")>;
-
-// ASIMD unzip/zip
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
-
-//--
-// 3.15 ASIMD Load Instructions
-//--
-
-// ASIMD load, 1 element, multiple, 1 reg, D-form
-// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[A64FXWrite_8Cyc_GI56],
- (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_11Cyc_GI56],
- (instregex "^LD1Onev(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD108, WriteAdr],
- (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD109, WriteAdr],
- (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 2 reg, D-form
-// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[A64FXWrite_LD102],
- (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD103],
- (instregex "^LD1Twov(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD110, WriteAdr],
- (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD111, WriteAdr],
- (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 3 reg, D-form
-// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[A64FXWrite_LD104],
- (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD105],
- (instregex "^LD1Threev(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD112, WriteAdr],
- (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD113, WriteAdr],
- (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, multiple, 4 reg, D-form
-// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[A64FXWrite_LD106],
- (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
-def : InstRW<[A64FXWrite_LD107],
- (instregex "^LD1Fourv(16b|8h|4s)$")>;
-def : InstRW<[A64FXWrite_LD114, WriteAdr],
- (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
-def : InstRW<[A64FXWrite_LD115, WriteAdr],
- (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
-
-// ASIMD load, 1 element, one lane, B/H/S
-// ASIMD load, 1 element, one lane, D
-def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
- (instregex "^LD1i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 1 element, all lanes, D-form, B/H/S
-// ASIMD load, 1 element, all lanes, D-form, D
-// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[A64FXWrite_8Cyc_GI03],
- (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD108, WriteAdr],
- (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 2 element, multiple, D-form, B/H/S
-// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD103],
- (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD111, WriteAdr],
- (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 2 element, one lane, B/H
-// ASIMD load, 2 element, one lane, S
-// ASIMD load, 2 element, one lane, D
-def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
- (instregex "^LD2i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 2 element, all lanes, D-form, B/H/S
-// ASIMD load, 2 element, all lanes, D-form, D
-// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[A64FXWrite_LD102],
- (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD110, WriteAdr],
- (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 3 element, multiple, D-form, B/H/S
-// ASIMD load, 3 element, multiple, Q-form, B/H/S
-// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD105],
- (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD113, WriteAdr],
- (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 3 element, one lone, B/H
-// ASIMD load, 3 element, one lane, S
-// ASIMD load, 3 element, one lane, D
-def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
- (instregex "^LD3i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 3 element, all lanes, D-form, B/H/S
-// ASIMD load, 3 element, all lanes, D-form, D
-// ASIMD load, 3 element, all lanes, Q-form, B/H/S
-// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[A64FXWrite_LD104],
- (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD112, WriteAdr],
- (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 4 element, multiple, D-form, B/H/S
-// ASIMD load, 4 element, multiple, Q-form, B/H/S
-// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_LD107],
- (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD115, WriteAdr],
- (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD load, 4 element, one lane, B/H
-// ASIMD load, 4 element, one lane, S
-// ASIMD load, 4 element, one lane, D
-def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
- (instregex "^LD4i(8|16|32|64)_POST$")>;
-
-// ASIMD load, 4 element, all lanes, D-form, B/H/S
-// ASIMD load, 4 element, all lanes, D-form, D
-// ASIMD load, 4 element, all lanes, Q-form, B/H/S
-// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[A64FXWrite_LD106],
- (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_LD114, WriteAdr],
- (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-//--
-// 3.16 ASIMD Store Instructions
-//--
-
-// ASIMD store, 1 element, multiple, 1 reg, D-form
-// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[A64FXWrite_ST10],
- (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST14, WriteAdr],
- (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 2 reg, D-form
-// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[A64FXWrite_ST11],
- (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
- (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 3 reg, D-form
-// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[A64FXWrite_ST12],
- (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
- (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, multiple, 4 reg, D-form
-// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[A64FXWrite_ST13],
- (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
- (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 1 element, one lane, B/H/S
-// ASIMD store, 1 element, one lane, D
-def : InstRW<[A64FXWrite_ST10],
- (instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST14, WriteAdr],
- (instregex "^ST1i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 2 element, multiple, D-form, B/H/S
-// ASIMD store, 2 element, multiple, Q-form, B/H/S
-// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST11],
- (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
- (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 2 element, one lane, B/H/S
-// ASIMD store, 2 element, one lane, D
-def : InstRW<[A64FXWrite_ST11],
- (instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST15, WriteAdr],
- (instregex "^ST2i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 3 element, multiple, D-form, B/H/S
-// ASIMD store, 3 element, multiple, Q-form, B/H/S
-// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST12],
- (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
- (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 3 element, one lane, B/H
-// ASIMD store, 3 element, one lane, S
-// ASIMD store, 3 element, one lane, D
-def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST16, WriteAdr],
- (instregex "^ST3i(8|16|32|64)_POST$")>;
-
-// ASIMD store, 4 element, multiple, D-form, B/H/S
-// ASIMD store, 4 element, multiple, Q-form, B/H/S
-// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[A64FXWrite_ST13],
- (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
- (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
-
-// ASIMD store, 4 element, one lane, B/H
-// ASIMD store, 4 element, one lane, S
-// ASIMD store, 4 element, one lane, D
-def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[A64FXWrite_ST17, WriteAdr],
- (instregex "^ST4i(8|16|32|64)_POST$")>;
-
-// V8.1a Atomics (LSE)
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
- (instrs CASB, CASH, CASW, CASX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
- (instrs CASAB, CASAH, CASAW, CASAX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
- (instrs CASLB, CASLH, CASLW, CASLX)>;
-
-def : InstRW<[A64FXWrite_CAS, WriteAtomic],
- (instrs CASALB, CASALH, CASALW, CASALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
- LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
- LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
- LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
- LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
- LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
- LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
- LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
- LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
- LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
-
-def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
- (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
- LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
- LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
- LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
- (instrs SWPB, SWPH, SWPW, SWPX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
- (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
- (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
-
-def : InstRW<[A64FXWrite_SWP, WriteAtomic],
- (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
-
-def : InstRW<[A64FXWrite_STUR, WriteAtomic],
- (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
-
-// [ 1] "abs $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>;
-
-// [ 2] "add $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>;
-
-// [ 3] "add $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>;
-
-// [ 4] "add $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>;
-
-// [ 5] "addpl $Rd, $Rn, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>;
-
-// [ 6] "addvl $Rd, $Rn, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>;
-
-// [ 7] "adr $Zd, [$Zn, $Zm]";
-def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>;
-
-// [ 8] "and $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>;
-
-// [ 9] "and $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>;
-
-// [10] "and $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>;
-
-// [11] "and $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>;
-
-// [12] "ands $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>;
-
-// [13] "andv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>;
-
-// [14] "asr $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>;
-
-// [15] "asr $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>;
-
-// [16] "asr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>;
-
-// [17] "asr $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>;
-
-// [18] "asrd $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>;
-
-// [19] "asrr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>;
-
-// [20] "bic $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>;
-
-// [21] "bic $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>;
-
-// [22] "bic $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>;
-
-// [23] "bics $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>;
-
-// [24] "brka $Pd, $Pg/m, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>;
-
-// [25] "brka $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>;
-
-// [26] "brkas $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>;
-
-// [27] "brkb $Pd, $Pg/m, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>;
-
-// [28] "brkb $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>;
-
-// [29] "brkbs $Pd, $Pg/z, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>;
-
-// [30] "brkn $Pdm, $Pg/z, $Pn, $_Pdm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>;
-
-// [31] "brkns $Pdm, $Pg/z, $Pn, $_Pdm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>;
-
-// [32] "brkpa $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>;
-
-// [33] "brkpas $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>;
-
-// [34] "brkpb $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>;
-
-// [35] "brkpbs $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>;
-
-// [36] "clasta $Rdn, $Pg, $_Rdn, $Zm";
-def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>;
-
-// [37] "clasta $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>;
-
-// [38] "clasta $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>;
-
-// [39] "clastb $Rdn, $Pg, $_Rdn, $Zm";
-def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>;
-
-// [40] "clastb $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>;
-
-// [41] "clastb $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>;
-
-// [42] "cls $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>;
-
-// [43] "clz $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>;
-
-// [44] "cmpeq $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>;
-
-// [45] "cmpeq $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>;
-
-// [46] "cmpge $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>;
-
-// [47] "cmpge $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>;
-
-// [48] "cmpgt $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>;
-
-// [49] "cmpgt $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>;
-
-// [50] "cmphi $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>;
-
-// [51] "cmphi $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>;
-
-// [52] "cmphs $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>;
-
-// [53] "cmphs $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>;
-
-// [54] "cmple $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>;
-
-// [55] "cmple $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>;
-
-// [56] "cmplo $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>;
-
-// [57] "cmplo $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>;
-
-// [58] "cmpls $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>;
-
-// [59] "cmpls $Pd, $Pg/z, $Zn, $imm7";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>;
-
-// [60] "cmplt $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>;
-
-// [61] "cmplt $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>;
-
-// [62] "cmpne $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>;
-
-// [63] "cmpne $Pd, $Pg/z, $Zn, $imm5";
-def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>;
-
-// [64] "cnot $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>;
-
-// [65] "cnt $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>;
-
-// [66] "cntb $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>;
-
-// [67] "cntd $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>;
-
-// [68] "cnth $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>;
-
-// [69] "cntp $Rd, $Pg, $Pn";
-def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>;
-
-// [70] "cntw $Rd, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>;
-
-// [71] "compact $Zd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>;
-
-// [72] "cpy $Zd, $Pg/m, $Rn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
-
-// [73] "cpy $Zd, $Pg/m, $Vn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
-
-// [74] "cpy $Zd, $Pg/m, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
-
-// [75] "cpy $Zd, $Pg/z, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
-
-// [76] "ctermeq $Rn, $Rm";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>;
-
-// [77] "ctermne $Rn, $Rm";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>;
-
-// [78] "decb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>;
-
-// [79] "decd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>;
-
-// [80] "decd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>;
-
-// [81] "dech $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>;
-
-// [82] "dech $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>;
-
-// [83] "decp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>;
-
-// [84] "decp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>;
-
-// [85] "decw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>;
-
-// [86] "decw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>;
-
-// [87] "dup $Zd, $Rn";
-def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>;
-
-// [88] "dup $Zd, $Zn$idx";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>;
-
-// [89] "dup $Zd, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>;
-
-// [90] "dupm $Zd, $imms";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>;
-
-// [91] "eor $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>;
-
-// [92] "eor $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>;
-
-// [93] "eor $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>;
-
-// [94] "eor $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>;
-
-// [95] "eors $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>;
-
-// [96] "eorv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>;
-
-// [97] "ext $Zdn, $_Zdn, $Zm, $imm8";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>;
-
-// [99] "fabd $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>;
-
-// [100] "fabs $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>;
-
-// [101] "facge $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>;
-
-// [102] "facgt $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>;
-
-// [103] "fadd $Zd, $Zn, $Zm"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>;
-
-// [104] "fadd $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>;
-
-// [105] "fadd $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>;
-
-// [106] "fadda $Vdn, $Pg, $_Vdn, $Zm";
-def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>;
-
-// [107] "faddv $Vd, $Pg, $Zn";
-// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle
-// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle
-// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle
-def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>;
-def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>;
-def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>;
-
-// [108] "fcadd $Zdn, $Pg/m, $_Zdn, $Zm, $imm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>;
-
-// [109] "fcmeq $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>;
-
-// [110] "fcmeq $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>;
-
-// [111] "fcmge $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>;
-
-// [112] "fcmge $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>;
-
-// [113] "fcmgt $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>;
-
-// [114] "fcmgt $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>;
-
-// [115] "fcmla $Zda, $Pg/m, $Zn, $Zm, $imm";
-def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>;
-
-// [116] "fcmla $Zda, $Zn, $Zm$iop, $imm";
-def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>;
-
-// [117] "fcmle $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>;
-
-// [118] "fcmlt $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>;
-
-// [119] "fcmne $Pd, $Pg/z, $Zn, #0.0";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>;
-
-// [120] "fcmne $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>;
-
-// [121] "fcmuo $Pd, $Pg/z, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>;
-
-// [122] "fcpy $Zd, $Pg/m, $imm8";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>;
-
-// [123] "fcvt $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>;
-
-// [124] "fcvtzs $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>;
-
-// [125] "fcvtzu $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>;
-
-// [126] "fdiv $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>;
-
-// [127] "fdivr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>;
-
-// [128] "fdup $Zd, $imm8";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>;
-
-// [129] "fexpa $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>;
-
-// [130] "fmad $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>;
-
-// [131] "fmax $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>;
-
-// [132] "fmax $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>;
-
-// [133] "fmaxnm $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>;
-
-// [134] "fmaxnm $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>;
-
-// [135] "fmaxnmv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>;
-
-// [136] "fmaxv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>;
-
-// [137] "fmin $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>;
-
-// [138] "fmin $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>;
-
-// [139] "fminnm $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>;
-
-// [140] "fminnm $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>;
-
-// [141] "fminnmv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>;
-
-// [142] "fminv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>;
-
-// [143] "fmla $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>;
-
-// [144] "fmla $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>;
-
-// [145] "fmls $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>;
-
-// [146] "fmls $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>;
-
-// [147] "fmsb $Zdn, $Pg/m, $Zm, $Za";
-
-// [148] "fmul $Zd, $Zn, $Zm";
-
-// [149] "fmul $Zd, $Zn, $Zm$iop";
-
-// [150] "fmul $Zdn, $Pg/m, $_Zdn, $Zm";
-
-// [151] "fmul $Zdn, $Pg/m, $_Zdn, $i1";
-
-// [152] "fmulx $Zdn, $Pg/m, $_Zdn, $Zm";
-
-// [153] "fneg $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>;
-
-// [154] "fnmad $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>;
-
-// [155] "fnmla $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>;
-
-// [156] "fnmls $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>;
-
-// [157] "fnmsb $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>;
-
-// [158] "frecpe $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>;
-
-// [159] "frecps $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>;
-
-// [160] "frecpx $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>;
-
-// [161] "frinta $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>;
-
-// [162] "frinti $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>;
-
-// [163] "frintm $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>;
-
-// [164] "frintn $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>;
-
-// [165] "frintp $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>;
-
-// [166] "frintx $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>;
-
-// [167] "frintz $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>;
-
-// [168] "frsqrte $Zd, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>;
-
-// [169] "frsqrts $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>;
-
-// [170] "fscale $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>;
-
-// [171] "fsqrt $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>;
-def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>;
-def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>;
-
-// [172] "fsub $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>;
-
-// [173] "fsub $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>;
-
-// [174] "fsub $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>;
-
-// [175] "fsubr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>;
-
-// [176] "fsubr $Zdn, $Pg/m, $_Zdn, $i1";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>;
-
-// [177] "ftmad $Zdn, $_Zdn, $Zm, $imm3";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>;
-
-// [178] "ftsmul $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>;
-
-// [180] "incb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>;
-
-// [181] "incd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>;
-
-// [182] "incd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>;
-
-// [183] "inch $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>;
-
-// [184] "inch $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>;
-
-// [185] "incp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>;
-
-// [186] "incp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>;
-
-// [187] "incw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>;
-
-// [188] "incw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>;
-
-// [189] "index $Zd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>;
-
-// [190] "index $Zd, $Rn, $imm5";
-def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>;
-
-// [191] "index $Zd, $imm5, $Rm";
-def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>;
-
-// [192] "index $Zd, $imm5, $imm5b";
-def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>;
-
-// [193] "insr $Zdn, $Rm";
-def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>;
-
-// [194] "insr $Zdn, $Vm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>;
-
-// [195] "lasta $Rd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>;
-
-// [196] "lasta $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>;
-
-// [197] "lastb $Rd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>;
-
-// [198] "lastb $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>;
-
-// [199] "ld1b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>;
-
-// [200] "ld1b $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>;
-
-// [201] "ld1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>;
-
-// [202] "ld1b $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>;
-
-// [203] "ld1d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>;
-
-// [204] "ld1d $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>;
-
-// [205] "ld1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>;
-
-// [206] "ld1d $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>;
-
-// [207] "ld1h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>;
-
-// [208] "ld1h $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>;
-
-// [209] "ld1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>;
-
-// [210] "ld1h $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>;
-
-// [211] "ld1rb $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>;
-
-// [212] "ld1rd $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>;
-
-// [213] "ld1rh $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>;
-
-// [214] "ld1rqb $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>;
-
-// [215] "ld1rqb $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>;
-
-// [216] "ld1rqd $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>;
-
-// [217] "ld1rqd $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>;
-
-// [218] "ld1rqh $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>;
-
-// [219] "ld1rqh $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>;
-
-// [220] "ld1rqw $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>;
-
-// [221] "ld1rqw $Zt, $Pg/z, [$Rn, $imm4]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>;
-
-// [222] "ld1rsb $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>;
-
-// [223] "ld1rsh $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>;
-
-// [224] "ld1rsw $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>;
-
-// [225] "ld1rw $Zt, $Pg/z, [$Rn, $imm6]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>;
-
-// [226] "ld1sb $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>;
-
-// [227] "ld1sb $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>;
-
-// [228] "ld1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>;
-
-// [229] "ld1sb $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>;
-
-// [230] "ld1sh $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>;
-
-// [231] "ld1sh $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>;
-
-// [232] "ld1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>;
-
-// [233] "ld1sh $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>;
-
-// [234] "ld1sw $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>;
-
-// [235] "ld1sw $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>;
-
-// [236] "ld1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>;
-
-// [237] "ld1sw $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>;
-
-// [238] "ld1w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>;
-
-// [239] "ld1w $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>;
-
-// [240] "ld1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>;
-
-// [241] "ld1w $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>;
-
-// [242] "ld2b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>;
-
-// [243] "ld2b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>;
-
-// [244] "ld2d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>;
-
-// [245] "ld2d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>;
-
-// [246] "ld2h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>;
-
-// [247] "ld2h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>;
-
-// [248] "ld2w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>;
-
-// [249] "ld2w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>;
-
-// [250] "ld3b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>;
-
-// [251] "ld3b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>;
-
-// [252] "ld3d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>;
-
-// [253] "ld3d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>;
-
-// [254] "ld3h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>;
-
-// [255] "ld3h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>;
-
-// [256] "ld3w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>;
-
-// [257] "ld3w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>;
-
-// [258] "ld4b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>;
-
-// [259] "ld4b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>;
-
-// [260] "ld4d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>;
-
-// [261] "ld4d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>;
-
-// [262] "ld4h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>;
-
-// [263] "ld4h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>;
-
-// [264] "ld4w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>;
-
-// [265] "ld4w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>;
-
-// [266] "ldff1b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>;
-
-// [267] "ldff1b $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>;
-
-// [268] "ldff1b $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>;
-
-// [269] "ldff1d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>;
-
-// [270] "ldff1d $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>;
-
-// [271] "ldff1d $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>;
-
-// [272] "ldff1h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>;
-
-// [273] "ldff1h $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>;
-
-// [274] "ldff1h $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>;
-
-// [275] "ldff1sb $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>;
-
-// [276] "ldff1sb $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>;
-
-// [277] "ldff1sb $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>;
-
-// [278] "ldff1sh $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>;
-
-// [279] "ldff1sh $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>;
-
-// [280] "ldff1sh $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>;
-
-// [281] "ldff1sw $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>;
-
-// [282] "ldff1sw $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>;
-
-// [283] "ldff1sw $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>;
-
-// [284] "ldff1w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>;
-
-// [285] "ldff1w $Zt, $Pg/z, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>;
-
-// [286] "ldff1w $Zt, $Pg/z, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>;
-
-// [287] "ldnf1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>;
-
-// [288] "ldnf1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>;
-
-// [289] "ldnf1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>;
-
-// [290] "ldnf1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>;
-
-// [291] "ldnf1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>;
-
-// [292] "ldnf1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>;
-
-// [293] "ldnf1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>;
-
-// [294] "ldnt1b $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>;
-
-// [295] "ldnt1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>;
-
-// [296] "ldnt1d $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>;
-
-// [297] "ldnt1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>;
-
-// [298] "ldnt1h $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>;
-
-// [299] "ldnt1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>;
-
-// [300] "ldnt1w $Zt, $Pg/z, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>;
-
-// [301] "ldnt1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>;
-
-// [302] "ldr $Pt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>;
-
-// [303] "ldr $Zt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>;
-
-// [304] "lsl $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>;
-
-// [305] "lsl $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>;
-
-// [306] "lsl $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>;
-
-// [307] "lsl $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>;
-
-// [308] "lslr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>;
-
-// [309] "lsr $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>;
-
-// [310] "lsr $Zd, $Zn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>;
-
-// [311] "lsr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>;
-
-// [312] "lsr $Zdn, $Pg/m, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>;
-
-// [313] "lsrr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>;
-
-// [314] "mad $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>;
-
-// [315] "mla $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>;
-
-// [316] "mls $Zda, $Pg/m, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>;
-
-// [317] "movprfx $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>;
-
-// [318] "movprfx $Zd, $Pg/z, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>;
-
-// [319] "movprfx $Zd, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>;
-
-// [320] "msb $Zdn, $Pg/m, $Zm, $Za";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>;
-
-// [321] "mul $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>;
-
-// [322] "mul $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>;
-
-// [323] "nand $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>;
-
-// [324] "nands $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>;
-
-// [325] "neg $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>;
-
-// [326] "nor $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>;
-
-// [327] "nors $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>;
-
-// [328] "not $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>;
-
-// [329] "orn $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>;
-
-// [330] "orns $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>;
-
-// [331] "orr $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>;
-
-// [332] "orr $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>;
-
-// [333] "orr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>;
-
-// [334] "orr $Zdn, $_Zdn, $imms13";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>;
-
-// [335] "orrs $Pd, $Pg/z, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>;
-
-// [336] "orv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>;
-
-// [337] "pfalse $Pd";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>;
-
-// [338] "pnext $Pdn, $Pg, $_Pdn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>;
-
-// [339] "prfb $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>;
-
-// [340] "prfb $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>;
-
-// [341] "prfb $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>;
-
-// [342] "prfb $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>;
-
-// [343] "prfd $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>;
-
-// [344] "prfd $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>;
-
-// [345] "prfd $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>;
-
-// [346] "prfd $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>;
-
-// [347] "prfh $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>;
-
-// [348] "prfh $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>;
-
-// [349] "prfh $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>;
-
-// [350] "prfh $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>;
-
-// [351] "prfw $prfop, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>;
-
-// [352] "prfw $prfop, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>;
-
-// [353] "prfw $prfop, $Pg, [$Rn, $imm6, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>;
-
-// [354] "prfw $prfop, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>;
-
-// [355] "ptest $Pg, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>;
-
-// [356] "ptrue $Pd, $pattern";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>;
-
-// [357] "ptrues $Pd, $pattern";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>;
-
-// [358] "punpkhi $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>;
-
-// [359] "punpklo $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>;
-
-// [360] "rbit $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>;
-
-// [361] "rdffr $Pd";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>;
-
-// [362] "rdffr $Pd, $Pg/z";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>;
-
-// [363] "rdffrs $Pd, $Pg/z";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>;
-
-// [364] "rdvl $Rd, $imm6";
-def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>;
-
-// [365] "rev $Pd, $Pn";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>;
-
-// [366] "rev $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>;
-
-// [367] "revb $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>;
-
-// [368] "revh $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>;
-
-// [369] "revw $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>;
-
-// [370] "sabd $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>;
-
-// [371] "saddv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>;
-
-// [372] "scvtf $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>;
-
-// [373] "sdiv $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>;
-
-// [374] "sdivr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>;
-
-// [375] "sdot $Zda, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>;
-
-// [376] "sdot $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>;
-
-// [377] "sel $Pd, $Pg, $Pn, $Pm";
-def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>;
-
-// [378] "sel $Zd, $Pg, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>;
-
-// [379] "setffr";
-def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>;
-
-// [380] "smax $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>;
-
-// [381] "smax $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>;
-
-// [382] "smaxv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>;
-
-// [383] "smin $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>;
-
-// [384] "smin $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>;
-
-// [385] "sminv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>;
-
-// [386] "smulh $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>;
-
-// [387] "splice $Zdn, $Pg, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>;
-
-// [388] "sqadd $Zd, $Zn, $Zm";
-
-// [389] "sqadd $Zdn, $_Zdn, $imm";
-
-// [390] "sqdecb $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>;
-
-// [391] "sqdecb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>;
-
-// [392] "sqdecd $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>;
-
-// [393] "sqdecd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>;
-
-// [394] "sqdecd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>;
-
-// [395] "sqdech $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>;
-
-// [396] "sqdech $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>;
-
-// [397] "sqdech $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>;
-
-// [398] "sqdecp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>;
-
-// [399] "sqdecp $Rdn, $Pg, $_Rdn";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>;
-
-// [400] "sqdecp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>;
-
-// [401] "sqdecw $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>;
-
-// [402] "sqdecw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>;
-
-// [403] "sqdecw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>;
-
-// [404] "sqincb $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>;
-
-// [405] "sqincb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>;
-
-// [406] "sqincd $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>;
-
-// [407] "sqincd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>;
-
-// [408] "sqincd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>;
-
-// [409] "sqinch $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>;
-
-// [410] "sqinch $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>;
-
-// [411] "sqinch $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>;
-
-// [412] "sqincp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>;
-
-// [413] "sqincp $Rdn, $Pg, $_Rdn";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>;
-
-// [414] "sqincp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>;
-
-// [415] "sqincw $Rdn, $_Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>;
-
-// [416] "sqincw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>;
-
-// [417] "sqincw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>;
-
-// [418] "sqsub $Zd, $Zn, $Zm";
-
-// [419] "sqsub $Zdn, $_Zdn, $imm";
-
-// [420] "st1b $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>;
-
-// [421] "st1b $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
-
-// [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>;
-
-// [423] "st1b $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>;
-
-// [424] "st1d $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>;
-
-// [425] "st1d $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
-
-// [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>;
-
-// [427] "st1d $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>;
-
-// [428] "st1h $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>;
-
-// [429] "st1h $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
-
-// [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>;
-
-// [431] "st1h $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>;
-
-// [432] "st1w $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>;
-
-// [433] "st1w $Zt, $Pg, [$Rn, $Zm]";
-def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
-
-// [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>;
-
-// [435] "st1w $Zt, $Pg, [$Zn, $imm5]";
-def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>;
-
-// [436] "st2b $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>;
-
-// [437] "st2b $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>;
-
-// [438] "st2d $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>;
-
-// [439] "st2d $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>;
-
-// [440] "st2h $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>;
-
-// [441] "st2h $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>;
-
-// [442] "st2w $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>;
-
-// [443] "st2w $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>;
-
-// [444] "st3b $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>;
-
-// [445] "st3b $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>;
-
-// [446] "st3d $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>;
-
-// [447] "st3d $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>;
-
-// [448] "st3h $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>;
-
-// [449] "st3h $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>;
-
-// [450] "st3w $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>;
-
-// [451] "st3w $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>;
-
-// [452] "st4b $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>;
-
-// [453] "st4b $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>;
-
-// [454] "st4d $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>;
-
-// [455] "st4d $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>;
-
-// [456] "st4h $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>;
-
-// [457] "st4h $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>;
-
-// [458] "st4w $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>;
-
-// [459] "st4w $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>;
-
-// [460] "stnt1b $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>;
-
-// [461] "stnt1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>;
-
-// [462] "stnt1d $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>;
-
-// [463] "stnt1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>;
-
-// [464] "stnt1h $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>;
-
-// [465] "stnt1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>;
-
-// [466] "stnt1w $Zt, $Pg, [$Rn, $Rm]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>;
-
-// [467] "stnt1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
-def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>;
-
-// [468] "str $Pt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>;
-
-// [469] "str $Zt, [$Rn, $imm9, mul vl]";
-def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>;
-
-// [470] "sub $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>;
-
-// [471] "sub $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>;
-
-// [472] "sub $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>;
-
-// [473] "subr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>;
-
-// [474] "subr $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>;
-
-// [475] "sunpkhi $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>;
-
-// [476] "sunpklo $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>;
-
-// [477] "sxtb $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>;
-
-// [478] "sxth $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>;
-
-// [479] "sxtw $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>;
-
-// [480] "tbl $Zd, $Zn, $Zm";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>;
-
-// [481] "trn1 $Pd, $Pn, $Pm";
-
-// [482] "trn1 $Zd, $Zn, $Zm";
-
-// [483] "trn2 $Pd, $Pn, $Pm";
-
-// [484] "trn2 $Zd, $Zn, $Zm";
-
-// [486] "uabd $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>;
-
-// [487] "uaddv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>;
-
-// [488] "ucvtf $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>;
-
-// [489] "udiv $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>;
-
-// [490] "udivr $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>;
-
-// [491] "udot $Zda, $Zn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>;
-
-// [492] "udot $Zda, $Zn, $Zm$iop";
-def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>;
-
-// [493] "umax $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>;
-
-// [494] "umax $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>;
-
-// [495] "umaxv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>;
-
-// [496] "umin $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>;
-
-// [497] "umin $Zdn, $_Zdn, $imm";
-def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>;
-
-// [498] "uminv $Vd, $Pg, $Zn";
-def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>;
-
-// [499] "umulh $Zdn, $Pg/m, $_Zdn, $Zm";
-def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>;
-
-// [500] "uqadd $Zd, $Zn, $Zm";
-
-// [501] "uqadd $Zdn, $_Zdn, $imm";
-
-// [502] "uqdecb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>;
-
-// [503] "uqdecd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>;
-
-// [504] "uqdecd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>;
-
-// [505] "uqdech $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>;
-
-// [506] "uqdech $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>;
-
-// [507] "uqdecp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>;
-
-// [508] "uqdecp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>;
-
-// [509] "uqdecw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>;
-
-// [510] "uqdecw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>;
-
-// [511] "uqincb $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>;
-
-// [512] "uqincd $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>;
-
-// [513] "uqincd $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>;
-
-// [514] "uqinch $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>;
-
-// [515] "uqinch $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>;
-
-// [516] "uqincp $Rdn, $Pg";
-def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>;
-
-// [517] "uqincp $Zdn, $Pg";
-def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>;
-
-// [518] "uqincw $Rdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>;
-
-// [519] "uqincw $Zdn, $pattern, mul $imm4";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>;
-
-// [520] "uqsub $Zd, $Zn, $Zm";
-//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>;
-
-// [521] "uqsub $Zdn, $_Zdn, $imm";
-//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>;
-
-// [522] "uunpkhi $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>;
-
-// [523] "uunpklo $Zd, $Zn";
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>;
-
-// [524] "uxtb $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>;
-
-// [525] "uxth $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>;
-
-// [526] "uxtw $Zd, $Pg/m, $Zn";
-def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>;
-
-// [527] "uzp1 $Pd, $Pn, $Pm";
-
-// [528] "uzp1 $Zd, $Zn, $Zm";
-
-// [529] "uzp2 $Pd, $Pn, $Pm";
-
-// [530] "uzp2 $Zd, $Zn, $Zm";
-
-// [531] "whilele $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>;
-
-// [532] "whilelo $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>;
-
-// [533] "whilels $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>;
-
-// [534] "whilelt $Pd, $Rn, $Rm";
-def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>;
-
-// [535] "wrffr $Pn";
-def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>;
-
-// [536] "zip1 $Pd, $Pn, $Pm";
-
-// [537] "zip1 $Zd, $Zn, $Zm";
-
-// [538] "zip2 $Pd, $Pn, $Pm";
-
-// [539] "zip2 $Zd, $Zn, $Zm";
-
-} // SchedModel = A64FXModel
+//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Fujitsu A64FX processors.
+//
+//===----------------------------------------------------------------------===//
+
+def A64FXModel : SchedMachineModel {
+ let IssueWidth = 6; // 6 micro-ops dispatched at a time.
+ let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
+ let LoadLatency = 5; // Optimistic load latency.
+ let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
+ // Determined via a mix of micro-arch details and experimentation.
+ let LoopMicroOpBufferSize = 128;
+ let PostRAScheduler = 1; // Using PostRA sched.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures =
+ [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
+
+ let FullInstRWOverlapCheck = 0;
+}
+
+let SchedModel = A64FXModel in {
+
+// Define the issue ports.
+
+// A64FXIP*
+
+// Port 0
+def A64FXIPFLA : ProcResource<1>;
+
+// Port 1
+def A64FXIPPR : ProcResource<1>;
+
+// Port 2
+def A64FXIPEXA : ProcResource<1>;
+
+// Port 3
+def A64FXIPFLB : ProcResource<1>;
+
+// Port 4
+def A64FXIPEXB : ProcResource<1>;
+
+// Port 5
+def A64FXIPEAGA : ProcResource<1>;
+
+// Port 6
+def A64FXIPEAGB : ProcResource<1>;
+
+// Port 7
+def A64FXIPBR : ProcResource<1>;
+
+// Define groups for the functional units on each issue port. Each group
+// created will be used by a WriteRes later on.
+
+def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
+
+def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
+
+def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
+
+def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
+
+def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
+
+def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
+
+def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
+
+def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
+
+def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
+
+def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
+
+def A64FXGI02 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA]>;
+
+def A64FXGI12 : ProcResGroup<[A64FXIPEXA, A64FXIPPR]>;
+
+def A64FXGI15 : ProcResGroup<[A64FXIPEAGA, A64FXIPPR]>;
+
+def A64FXGI05 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA]>;
+
+def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
+
+def A64FXGI124 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPPR]>;
+
+def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI0256 : ProcResGroup<[A64FXIPFLA, A64FXIPEXA, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
+
+def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
+ A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]> {
+ let BufferSize = 60;
+}
+
+def A64FXWrite_6Cyc : SchedWriteRes<[]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 2;
+}
+
+def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_5Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_13Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 13;
+}
+
+def A64FXWrite_37Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 37;
+}
+
+def A64FXWrite_98Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 98;
+}
+
+def A64FXWrite_134Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 134;
+}
+
+def A64FXWrite_154Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 154;
+}
+
+def A64FXWrite_4Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_12Cyc_GI01 : SchedWriteRes<[A64FXGI01]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_10Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_17Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 17;
+}
+
+def A64FXWrite_21Cyc_GI02 : SchedWriteRes<[A64FXGI02]> {
+ let Latency = 21;
+}
+
+def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_6Cyc_NGI1 : SchedWriteRes<[A64FXGI1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_4Cyc_GI12 : SchedWriteRes<[A64FXGI12]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_3Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_6Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_6Cyc_GI15 : SchedWriteRes<[A64FXGI15]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_3Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 3;
+}
+
+def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 4;
+}
+
+def A64FXWrite_6Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_10Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_12Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_14Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_15Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_15Cyc_NGI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_18Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 18;
+}
+
+def A64FXWrite_45Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 45;
+}
+
+def A64FXWrite_60Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 60;
+}
+
+def A64FXWrite_75Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 75;
+}
+
+def A64FXWrite_6Cyc_GI05 : SchedWriteRes<[A64FXGI05]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
+ let Latency = 20;
+}
+
+def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
+ let Latency = 11;
+}
+
+def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 2;
+}
+
+def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_6Cyc_GI124: SchedWriteRes<[A64FXGI124]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_8Cyc_GI124 : SchedWriteRes<[A64FXGI124]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_6Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+}
+
+def A64FXWrite_44Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
+ let Latency = 44;
+}
+
+def A64FXWrite_10Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_15Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_19Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_25Cyc_GI056 : SchedWriteRes<[A64FXGI056]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_14Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_19Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_29Cyc_GI0256 : SchedWriteRes<[A64FXGI0256]> {
+ let Latency = 29;
+}
+
+def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+
+}
+
+def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 11;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+
+def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_FMOV_VG : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+ let NumMicroOps = 6;
+}
+
+def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+}
+
+
+def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+
+def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_FMAXVVS : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 14;
+}
+
+def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 5;
+}
+
+def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+}
+
+def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 12;
+}
+
+def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 25;
+}
+
+def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+
+def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 5;
+}
+
+def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+
+def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 0;
+}
+
+def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 1;
+}
+
+def A64FXWrite_ST1W_6: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 6;
+}
+
+def A64FXWrite_ST2W_7: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 7;
+}
+
+def A64FXWrite_ST3W_8: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 8;
+}
+
+def A64FXWrite_ST4W_9: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 9;
+}
+
+def A64FXWrite_ST1W_15: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 15;
+}
+
+def A64FXWrite_ST1W_19: SchedWriteRes<[A64FXGI056]> {
+ let Latency = 19;
+}
+
+def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
+ let Latency = 7;
+}
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// 3. Instruction Tables.
+
+//---
+// 3.1 Branch Instructions
+//---
+
+// Branch, immed
+// Branch and link, immed
+// Compare and branch
+def : WriteRes<WriteBr, [A64FXGI7]> {
+ let Latency = 1;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [A64FXGI7]> {
+ let Latency = 1;
+}
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> {
+ let Latency = 4;
+}
+
+//---
+// Branch
+//---
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
+def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
+def : InstRW<[A64FXWrite_1Cyc_GI7],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
+
+//---
+// 3.2 Arithmetic and Logical Instructions
+// 3.3 Move and Shift Instructions
+//---
+
+// ALU, basic
+// Conditional compare
+// Conditional select
+// Address generation
+def : WriteRes<WriteI, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// ALU, extend and/or shift
+def : WriteRes<WriteISReg, [A64FXGI2456]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+def : WriteRes<WriteIEReg, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC(W|X)r",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
+ "SBCS(W|X)r", "CCMN(W|X)(i|r)",
+ "CCMP(W|X)(i|r)", "CSEL(W|X)r",
+ "CSINC(W|X)r", "CSINV(W|X)r",
+ "CSNEG(W|X)r")>;
+
+// Move immed
+def : WriteRes<WriteImm, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[A64FXWrite_1Cyc_GI2456],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[A64FXWrite_2Cyc_GI24],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
+
+// Variable shift
+def : WriteRes<WriteIS, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+//---
+// 3.4 Divide and Multiply Instructions
+//---
+
+// Divide, W-form
+def : WriteRes<WriteID32, [A64FXGI4]> {
+ let Latency = 39;
+ let ResourceCycles = [39];
+}
+
+// Divide, X-form
+def : WriteRes<WriteID64, [A64FXGI4]> {
+ let Latency = 23;
+ let ResourceCycles = [23];
+}
+
+// Multiply accumulate, W-form
+def : WriteRes<WriteIM32, [A64FXGI2456]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+}
+
+// Multiply accumulate, X-form
+def : WriteRes<WriteIM64, [A64FXGI2456]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+}
+
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[A64FXWrite_MADDL],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
+
+// Bitfield extract, two reg
+def : WriteRes<WriteExtr, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+// Multiply high
+def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[A64FXWrite_1Cyc_GI24],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
+
+// Bitfield move, insert
+def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
+
+// Count leading
+def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
+def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
+def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
+def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
+
+// CRC Instructions
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
+
+def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
+def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
+def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
+
+// Reverse bits/bytes
+// NOTE: Handled by WriteI.
+
+//---
+// 3.6 Load Instructions
+// 3.10 FP Load Instructions
+//---
+
+// Load register, literal
+// Load register, unscaled immed
+// Load register, immed unprivileged
+// Load register, unsigned immed
+def : WriteRes<WriteLD, [A64FXGI56]> {
+ let Latency = 4;
+ let ResourceCycles = [3];
+}
+
+// Load register, immed post-index
+// NOTE: Handled by WriteLD, WriteI.
+// Load register, immed pre-index
+// NOTE: Handled by WriteLD, WriteAdr.
+def : WriteRes<WriteAdr, [A64FXGI2456]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+}
+
+// Load register offset, basic
+// Load register, register offset, scale by 4/8
+// Load register, register offset, scale by 2
+// Load register offset, extend
+// Load register, register offset, extend, scale by 4/8
+// Load register, register offset, extend, scale by 2
+def A64FXWriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
+ SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>;
+def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
+
+def A64FXReadAdrBase : SchedReadVariant<[
+ SchedVar<ScaledIdxPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
+def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
+
+// Load pair, immed pre-index, normal
+// Load pair, immed pre-index, signed words
+// Load pair, immed post-index, normal
+// Load pair, immed post-index, signed words
+// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
+def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
+def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
+def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
+def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
+
+//--
+// 3.7 Store Instructions
+// 3.11 FP Store Instructions
+//--
+
+// Store register, unscaled immed
+// Store register, immed unprivileged
+// Store register, unsigned immed
+def : WriteRes<WriteST, [A64FXGI56]> {
+ let Latency = 1;
+}
+
+// Store register, immed post-index
+// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
+
+// Store register, immed pre-index
+// NOTE: Handled by WriteAdr, WriteST
+
+// Store register, register offset, basic
+// Store register, register offset, scaled by 4/8
+// Store register, register offset, scaled by 2
+// Store register, register offset, extend
+// Store register, register offset, extend, scale by 4/8
+// Store register, register offset, extend, scale by 1
+def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
+ let Latency = 1;
+}
+
+// Store pair, immed offset, W-form
+// Store pair, immed offset, X-form
+def : WriteRes<WriteSTP, [A64FXGI56]> {
+ let Latency = 1;
+}
+
+// Store pair, immed post-index, W-form
+// Store pair, immed post-index, X-form
+// Store pair, immed pre-index, W-form
+// Store pair, immed pre-index, X-form
+// NOTE: Handled by WriteAdr, WriteSTP.
+
+def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
+
+def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
+
+def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
+def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
+
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
+def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
+
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
+//---
+// 3.8 FP Data Processing Instructions
+//---
+
+// FP absolute value
+// FP min/max
+// FP negate
+def : WriteRes<WriteF, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP arithmetic
+
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
+
+// FP compare
+def : WriteRes<WriteFCmp, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv, [A64FXGI0]> {
+ let Latency = 43;
+}
+
+def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 38;
+}
+
+def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 29;
+}
+
+def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 43;
+}
+
+def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 29;
+}
+
+def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
+ let Latency = 43;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
+def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
+
+// FP divide, D-form
+// FP square root, D-form
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
+def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
+
+// FP multiply
+// FP multiply accumulate
+def : WriteRes<WriteFMul, [A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def A64FXXWriteFMul : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def A64FXXWriteFMulAcc : SchedWriteRes<[A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+def : InstRW<[A64FXXWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[A64FXXWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
+
+// FP round to integral
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
+
+// FP select
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
+
+//---
+// 3.9 FP Miscellaneous Instructions
+//---
+
+// FP convert, from vec to vec reg
+// FP convert, from gen to vec reg
+// FP convert, from vec to gen reg
+def : WriteRes<WriteFCvt, [A64FXGI03]> {
+ let Latency = 9;
+ let ResourceCycles = [2];
+}
+
+// FP move, immed
+// FP move, register
+def : WriteRes<WriteFImm, [A64FXGI0]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+// FP transfer, from gen to vec reg
+// FP transfer, from vec to gen reg
+def : WriteRes<WriteFCopy, [A64FXGI0]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+}
+
+def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
+def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
+
+//---
+// 3.12 ASIMD Integer Instructions
+//---
+
+// ASIMD absolute diff, D-form
+// ASIMD absolute diff, Q-form
+// ASIMD absolute diff accum, D-form
+// ASIMD absolute diff accum, Q-form
+// ASIMD absolute diff accum long
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD compare
+// ASIMD logical (AND, BIC, EOR)
+// ASIMD max/min, basic
+// ASIMD max/min, reduce, 4H/4S
+// ASIMD max/min, reduce, 8B/8H
+// ASIMD max/min, reduce, 16B
+// ASIMD multiply, D-form
+// ASIMD multiply, Q-form
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate saturating long
+// ASIMD multiply long
+// ASIMD pairwise add and accumulate
+// ASIMD shift accumulate
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic, D-form
+// ASIMD shift by immed and insert, basic, Q-form
+// ASIMD shift by immed, complex
+// ASIMD shift by register, basic, D-form
+// ASIMD shift by register, basic, Q-form
+// ASIMD shift by register, complex, D-form
+// ASIMD shift by register, complex, Q-form
+def : WriteRes<WriteV, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// ASIMD arith, reduce, 4H/4S
+// ASIMD arith, reduce, 8B/8H
+// ASIMD arith, reduce, 16B
+
+// ASIMD logical (MVN (alias for NOT), ORN, ORR)
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
+
+// ASIMD arith, reduce
+def : InstRW<[A64FXWrite_ADDLV],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
+
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[A64FXWrite_MULLV],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[A64FXWrite_ABAL],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[A64FXWrite_ADDLV1],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[A64FXWrite_PMUL],
+ (instregex "^(P?MUL|SQR?DMUL)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+
+// ASIMD multiply, Q-form
+def : InstRW<[A64FXWrite_PMUL],
+ (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply, Q-form
+def : InstRW<[A64FXWrite_SQRDMULH],
+ (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[A64FXWrite_SRSRAV],
+ (instregex "SRSRAv", "URSRAv")>;
+def : InstRW<[A64FXWrite_SSRAV],
+ (instregex "SSRAv", "USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[A64FXWrite_RSHRN],
+ (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
+def : InstRW<[A64FXWrite_SHRN],
+ (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
+
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[A64FXWrite_ADDP],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[A64FXWrite_4Cyc_GI0],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
+def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
+def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
+def : InstRW<[A64FXWrite_ABA],
+ (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
+def : InstRW<[A64FXWrite_SHRN],
+ (instregex "^ADDHNv", "^SUBHNv")>;
+def : InstRW<[A64FXWrite_RSHRN],
+ (instregex "^RADDHNv", "^RSUBHNv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
+ "^URHADD", "^USQADD")>;
+
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^CMEQv", "^CMGEv", "^CMGTv",
+ "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
+def : InstRW<[A64FXWrite_MINMAXV],
+ (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
+def : InstRW<[A64FXWrite_ADDP],
+ (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^SABDv", "^UABDv")>;
+def : InstRW<[A64FXWrite_TBX1],
+ (instregex "^SABDLv", "^UABDLv")>;
+
+//---
+// 3.13 ASIMD Floating-point Instructions
+//---
+
+// ASIMD FP absolute value
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
+
+// ASIMD FP arith, normal, D-form
+// ASIMD FP arith, normal, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+
+// ASIMD FP arith, pairwise, D-form
+// ASIMD FP arith, pairwise, Q-form
+def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
+
+// ASIMD FP compare, D-form
+// ASIMD FP compare, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+// ASIMD FP round, D-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[A64FXWrite_9Cyc_GI03],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
+
+// ASIMD FP convert, long
+// ASIMD FP convert, narrow
+// ASIMD FP convert, other, D-form
+// ASIMD FP convert, other, Q-form
+
+// ASIMD FP convert, long and narrow
+def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[A64FXWrite_FCVTXNV],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[A64FXWrite_FCVTXNV],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
+def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
+def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
+def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
+
+// ASIMD FP max/min, normal, D-form
+// ASIMD FP max/min, normal, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
+ "^FMINv", "^FMINNMv")>;
+
+// ASIMD FP max/min, pairwise, D-form
+// ASIMD FP max/min, pairwise, Q-form
+def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
+ "^FMINPv", "^FMINNMPv")>;
+
+// ASIMD FP max/min, reduce
+def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
+ "^FMINVv", "^FMINNMVv")>;
+
+// ASIMD FP multiply, D-form, FZ
+// ASIMD FP multiply, D-form, no FZ
+// ASIMD FP multiply, Q-form, FZ
+// ASIMD FP multiply, Q-form, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP multiply accumulate, Dform, FZ
+// ASIMD FP multiply accumulate, Dform, no FZ
+// ASIMD FP multiply accumulate, Qform, FZ
+// ASIMD FP multiply accumulate, Qform, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[A64FXWrite_FMULXE],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
+
+// ASIMD FP negate
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
+
+//--
+// 3.14 ASIMD Miscellaneous Instructions
+//--
+
+// ASIMD bit reverse
+def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
+
+// ASIMD bitwise insert, D-form
+// ASIMD bitwise insert, Q-form
+def : InstRW<[A64FXWrite_BIF],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
+
+// ASIMD count, D-form
+// ASIMD count, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI0],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
+
+// ASIMD duplicate, gen reg
+// ASIMD duplicate, element
+def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
+
+// ASIMD extract narrow
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[A64FXWrite_6Cyc_GI3],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
+
+// ASIMD insert, element to element
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
+
+// ASIMD move, integer immed
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
+
+// ASIMD move, FP immed
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
+def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
+def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
+def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
+def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[A64FXWrite_6Cyc_GI0],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
+// ASIMD reciprocal estimate, D-form
+// ASIMD reciprocal estimate, Q-form
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
+ "^FRSQRTEv", "^URSQRTEv")>;
+
+// ASIMD reciprocal step, D-form, FZ
+// ASIMD reciprocal step, D-form, no FZ
+// ASIMD reciprocal step, Q-form, FZ
+// ASIMD reciprocal step, Q-form, no FZ
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
+
+// ASIMD reverse
+def : InstRW<[A64FXWrite_4Cyc_GI03],
+ (instregex "^REV16v", "^REV32v", "^REV64v")>;
+
+// ASIMD table lookup, D-form
+// ASIMD table lookup, Q-form
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
+
+// ASIMD transfer, element to word or word
+def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
+
+// ASIMD transfer gen reg to element
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
+
+// ASIMD transpose
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
+ "^UZP1v", "^UZP2v")>;
+
+// ASIMD unzip/zip
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
+
+//--
+// 3.15 ASIMD Load Instructions
+//--
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[A64FXWrite_8Cyc_GI56],
+ (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_11Cyc_GI56],
+ (instregex "^LD1Onev(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD108, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD109, WriteAdr],
+ (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[A64FXWrite_LD102],
+ (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD103],
+ (instregex "^LD1Twov(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD110, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD111, WriteAdr],
+ (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[A64FXWrite_LD104],
+ (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD105],
+ (instregex "^LD1Threev(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD112, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD113, WriteAdr],
+ (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[A64FXWrite_LD106],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
+def : InstRW<[A64FXWrite_LD107],
+ (instregex "^LD1Fourv(16b|8h|4s)$")>;
+def : InstRW<[A64FXWrite_LD114, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
+def : InstRW<[A64FXWrite_LD115, WriteAdr],
+ (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
+ (instregex "^LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[A64FXWrite_8Cyc_GI03],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD108, WriteAdr],
+ (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD103],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD111, WriteAdr],
+ (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
+ (instregex "^LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[A64FXWrite_LD102],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD110, WriteAdr],
+ (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD105],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD113, WriteAdr],
+ (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lone, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
+ (instregex "^LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[A64FXWrite_LD104],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD112, WriteAdr],
+ (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_LD107],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD115, WriteAdr],
+ (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
+ (instregex "^LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[A64FXWrite_LD106],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_LD114, WriteAdr],
+ (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//--
+// 3.16 ASIMD Store Instructions
+//--
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[A64FXWrite_ST10],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST14, WriteAdr],
+ (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[A64FXWrite_ST12],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[A64FXWrite_ST13],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[A64FXWrite_ST10],
+ (instregex "^ST1i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST14, WriteAdr],
+ (instregex "^ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[A64FXWrite_ST11],
+ (instregex "^ST2i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST15, WriteAdr],
+ (instregex "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST12],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST16, WriteAdr],
+ (instregex "^ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[A64FXWrite_ST13],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H
+// ASIMD store, 4 element, one lane, S
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[A64FXWrite_ST17, WriteAdr],
+ (instregex "^ST4i(8|16|32|64)_POST$")>;
+
+// V8.1a Atomics (LSE)
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASB, CASH, CASW, CASX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASAB, CASAH, CASAW, CASAX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASLB, CASLH, CASLW, CASLX)>;
+
+def : InstRW<[A64FXWrite_CAS, WriteAtomic],
+ (instrs CASALB, CASALH, CASALW, CASALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
+ LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
+ LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
+ LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
+ LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
+ LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
+ LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
+ LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
+ LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
+ LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
+
+def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
+ (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
+ LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
+ LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
+ LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPB, SWPH, SWPW, SWPX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
+
+def : InstRW<[A64FXWrite_SWP, WriteAtomic],
+ (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
+
+def : InstRW<[A64FXWrite_STUR, WriteAtomic],
+ (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
+
+// [ 1] "abs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ABS_ZPmZ_B, ABS_ZPmZ_D, ABS_ZPmZ_H, ABS_ZPmZ_S)>;
+
+// [ 2] "add $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZZZ_B, ADD_ZZZ_D, ADD_ZZZ_H, ADD_ZZZ_S)>;
+
+// [ 3] "add $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZPmZ_B, ADD_ZPmZ_D, ADD_ZPmZ_H, ADD_ZPmZ_S)>;
+
+// [ 4] "add $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ADD_ZI_B, ADD_ZI_D, ADD_ZI_H, ADD_ZI_S)>;
+
+// [ 5] "addpl $Rd, $Rn, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDPL_XXI)>;
+
+// [ 6] "addvl $Rd, $Rn, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs ADDVL_XXI)>;
+
+// [ 7] "adr $Zd, [$Zn, $Zm]";
+def : InstRW<[A64FXWrite_5Cyc_GI0], (instrs ADR_LSL_ZZZ_D_0, ADR_LSL_ZZZ_D_1, ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3, ADR_LSL_ZZZ_S_0, ADR_LSL_ZZZ_S_1, ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3, ADR_SXTW_ZZZ_D_0, ADR_SXTW_ZZZ_D_1, ADR_SXTW_ZZZ_D_2, ADR_SXTW_ZZZ_D_3, ADR_UXTW_ZZZ_D_0, ADR_UXTW_ZZZ_D_1, ADR_UXTW_ZZZ_D_2, ADR_UXTW_ZZZ_D_3)>;
+
+// [ 8] "and $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs AND_PPzPP)>;
+
+// [ 9] "and $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZZZ)>;
+
+// [10] "and $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZPmZ_B, AND_ZPmZ_D, AND_ZPmZ_H, AND_ZPmZ_S)>;
+
+// [11] "and $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs AND_ZI)>;
+
+// [12] "ands $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ANDS_PPzPP)>;
+
+// [13] "andv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ANDV_VPZ_B, ANDV_VPZ_D, ANDV_VPZ_H, ANDV_VPZ_S)>;
+
+// [14] "asr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZZZ_B, ASR_WIDE_ZZZ_H, ASR_WIDE_ZZZ_S)>;
+
+// [15] "asr $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZZI_B, ASR_ZZI_D, ASR_ZZI_H, ASR_ZZI_S)>;
+
+// [16] "asr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_WIDE_ZPmZ_B, ASR_WIDE_ZPmZ_H, ASR_WIDE_ZPmZ_S, ASR_ZPmZ_B, ASR_ZPmZ_D, ASR_ZPmZ_H, ASR_ZPmZ_S)>;
+
+// [17] "asr $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASR_ZPmI_B, ASR_ZPmI_D, ASR_ZPmI_H, ASR_ZPmI_S)>;
+
+// [18] "asrd $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRD_ZPmI_B, ASRD_ZPmI_D, ASRD_ZPmI_H, ASRD_ZPmI_S)>;
+
+// [19] "asrr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ASRR_ZPmZ_B, ASRR_ZPmZ_D, ASRR_ZPmZ_H, ASRR_ZPmZ_S)>;
+
+// [20] "bic $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BIC_PPzPP)>;
+
+// [21] "bic $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZZZ)>;
+
+// [22] "bic $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs BIC_ZPmZ_B, BIC_ZPmZ_D, BIC_ZPmZ_H, BIC_ZPmZ_S)>;
+
+// [23] "bics $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BICS_PPzPP)>;
+
+// [24] "brka $Pd, $Pg/m, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPmP)>;
+
+// [25] "brka $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKA_PPzP)>;
+
+// [26] "brkas $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKAS_PPzP)>;
+
+// [27] "brkb $Pd, $Pg/m, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPmP)>;
+
+// [28] "brkb $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKB_PPzP)>;
+
+// [29] "brkbs $Pd, $Pg/z, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKBS_PPzP)>;
+
+// [30] "brkn $Pdm, $Pg/z, $Pn, $_Pdm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKN_PPzP)>;
+
+// [31] "brkns $Pdm, $Pg/z, $Pn, $_Pdm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKNS_PPzP)>;
+
+// [32] "brkpa $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPA_PPzPP)>;
+
+// [33] "brkpas $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPAS_PPzPP)>;
+
+// [34] "brkpb $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPB_PPzPP)>;
+
+// [35] "brkpbs $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs BRKPBS_PPzPP)>;
+
+// [36] "clasta $Rdn, $Pg, $_Rdn, $Zm";
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTA_RPZ_B, CLASTA_RPZ_D, CLASTA_RPZ_H, CLASTA_RPZ_S)>;
+
+// [37] "clasta $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_VPZ_B, CLASTA_VPZ_D, CLASTA_VPZ_H, CLASTA_VPZ_S)>;
+
+// [38] "clasta $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTA_ZPZ_B, CLASTA_ZPZ_D, CLASTA_ZPZ_H, CLASTA_ZPZ_S)>;
+
+// [39] "clastb $Rdn, $Pg, $_Rdn, $Zm";
+def : InstRW<[A64FXWrite_29Cyc_GI0256], (instrs CLASTB_RPZ_B, CLASTB_RPZ_D, CLASTB_RPZ_H, CLASTB_RPZ_S)>;
+
+// [40] "clastb $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_VPZ_B, CLASTB_VPZ_D, CLASTB_VPZ_H, CLASTB_VPZ_S)>;
+
+// [41] "clastb $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CLASTB_ZPZ_B, CLASTB_ZPZ_D, CLASTB_ZPZ_H, CLASTB_ZPZ_S)>;
+
+// [42] "cls $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLS_ZPmZ_B, CLS_ZPmZ_D, CLS_ZPmZ_H, CLS_ZPmZ_S)>;
+
+// [43] "clz $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs CLZ_ZPmZ_B, CLZ_ZPmZ_D, CLZ_ZPmZ_H, CLZ_ZPmZ_S)>;
+
+// [44] "cmpeq $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZZ_B, CMPEQ_PPzZZ_D, CMPEQ_PPzZZ_H, CMPEQ_PPzZZ_S, CMPEQ_WIDE_PPzZZ_B, CMPEQ_WIDE_PPzZZ_H, CMPEQ_WIDE_PPzZZ_S)>;
+
+// [45] "cmpeq $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPEQ_PPzZI_B, CMPEQ_PPzZI_D, CMPEQ_PPzZI_H, CMPEQ_PPzZI_S)>;
+
+// [46] "cmpge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZZ_B, CMPGE_PPzZZ_D, CMPGE_PPzZZ_H, CMPGE_PPzZZ_S, CMPGE_WIDE_PPzZZ_B, CMPGE_WIDE_PPzZZ_H, CMPGE_WIDE_PPzZZ_S)>;
+
+// [47] "cmpge $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGE_PPzZI_B, CMPGE_PPzZI_D, CMPGE_PPzZI_H, CMPGE_PPzZI_S)>;
+
+// [48] "cmpgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZZ_B, CMPGT_PPzZZ_D, CMPGT_PPzZZ_H, CMPGT_PPzZZ_S, CMPGT_WIDE_PPzZZ_B, CMPGT_WIDE_PPzZZ_H, CMPGT_WIDE_PPzZZ_S)>;
+
+// [49] "cmpgt $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPGT_PPzZI_B, CMPGT_PPzZI_D, CMPGT_PPzZI_H, CMPGT_PPzZI_S)>;
+
+// [50] "cmphi $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZZ_B, CMPHI_PPzZZ_D, CMPHI_PPzZZ_H, CMPHI_PPzZZ_S, CMPHI_WIDE_PPzZZ_B, CMPHI_WIDE_PPzZZ_H, CMPHI_WIDE_PPzZZ_S)>;
+
+// [51] "cmphi $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHI_PPzZI_B, CMPHI_PPzZI_D, CMPHI_PPzZI_H, CMPHI_PPzZI_S)>;
+
+// [52] "cmphs $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZZ_B, CMPHS_PPzZZ_D, CMPHS_PPzZZ_H, CMPHS_PPzZZ_S, CMPHS_WIDE_PPzZZ_B, CMPHS_WIDE_PPzZZ_H, CMPHS_WIDE_PPzZZ_S)>;
+
+// [53] "cmphs $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPHS_PPzZI_B, CMPHS_PPzZI_D, CMPHS_PPzZI_H, CMPHS_PPzZI_S)>;
+
+// [54] "cmple $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_WIDE_PPzZZ_B, CMPLE_WIDE_PPzZZ_H, CMPLE_WIDE_PPzZZ_S)>;
+
+// [55] "cmple $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLE_PPzZI_B, CMPLE_PPzZI_D, CMPLE_PPzZI_H, CMPLE_PPzZI_S)>;
+
+// [56] "cmplo $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_WIDE_PPzZZ_B, CMPLO_WIDE_PPzZZ_H, CMPLO_WIDE_PPzZZ_S)>;
+
+// [57] "cmplo $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLO_PPzZI_B, CMPLO_PPzZI_D, CMPLO_PPzZI_H, CMPLO_PPzZI_S)>;
+
+// [58] "cmpls $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_WIDE_PPzZZ_B, CMPLS_WIDE_PPzZZ_H, CMPLS_WIDE_PPzZZ_S)>;
+
+// [59] "cmpls $Pd, $Pg/z, $Zn, $imm7";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLS_PPzZI_B, CMPLS_PPzZI_D, CMPLS_PPzZI_H, CMPLS_PPzZI_S)>;
+
+// [60] "cmplt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_WIDE_PPzZZ_B, CMPLT_WIDE_PPzZZ_H, CMPLT_WIDE_PPzZZ_S)>;
+
+// [61] "cmplt $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPLT_PPzZI_B, CMPLT_PPzZI_D, CMPLT_PPzZI_H, CMPLT_PPzZI_S)>;
+
+// [62] "cmpne $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZZ_B, CMPNE_PPzZZ_D, CMPNE_PPzZZ_H, CMPNE_PPzZZ_S, CMPNE_WIDE_PPzZZ_B, CMPNE_WIDE_PPzZZ_H, CMPNE_WIDE_PPzZZ_S)>;
+
+// [63] "cmpne $Pd, $Pg/z, $Zn, $imm5";
+def : InstRW<[A64FXWrite_4Cyc_GI01], (instrs CMPNE_PPzZI_B, CMPNE_PPzZI_D, CMPNE_PPzZI_H, CMPNE_PPzZI_S)>;
+
+// [64] "cnot $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs CNOT_ZPmZ_B, CNOT_ZPmZ_D, CNOT_ZPmZ_H, CNOT_ZPmZ_S)>;
+
+// [65] "cnt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI3], (instrs CNT_ZPmZ_B, CNT_ZPmZ_D, CNT_ZPmZ_H, CNT_ZPmZ_S)>;
+
+// [66] "cntb $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTB_XPiI)>;
+
+// [67] "cntd $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTD_XPiI)>;
+
+// [68] "cnth $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTH_XPiI)>;
+
+// [69] "cntp $Rd, $Pg, $Pn";
+def : InstRW<[A64FXWrite_6Cyc_GI01], (instrs CNTP_XPP_B, CNTP_XPP_D, CNTP_XPP_H, CNTP_XPP_S)>;
+
+// [70] "cntw $Rd, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>;
+
+// [71] "compact $Zd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>;
+
+// [72] "cpy $Zd, $Pg/m, $Rn";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
+
+// [73] "cpy $Zd, $Pg/m, $Vn";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
+
+// [74] "cpy $Zd, $Pg/m, $imm";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
+
+// [75] "cpy $Zd, $Pg/z, $imm";
+//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
+
+// [76] "ctermeq $Rn, $Rm";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>;
+
+// [77] "ctermne $Rn, $Rm";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMNE_WW, CTERMNE_XX)>;
+
+// [78] "decb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECB_XPiI)>;
+
+// [79] "decd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECD_XPiI)>;
+
+// [80] "decd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECD_ZPiI)>;
+
+// [81] "dech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECH_XPiI)>;
+
+// [82] "dech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECH_ZPiI)>;
+
+// [83] "decp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs DECP_XP_B, DECP_XP_D, DECP_XP_H, DECP_XP_S)>;
+
+// [84] "decp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs DECP_ZP_D, DECP_ZP_H, DECP_ZP_S)>;
+
+// [85] "decw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs DECW_XPiI)>;
+
+// [86] "decw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs DECW_ZPiI)>;
+
+// [87] "dup $Zd, $Rn";
+def : InstRW<[A64FXWrite_8Cyc_GI01], (instrs DUP_ZR_B, DUP_ZR_D, DUP_ZR_H, DUP_ZR_S)>;
+
+// [88] "dup $Zd, $Zn$idx";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs DUP_ZZI_B, DUP_ZZI_D, DUP_ZZI_H, DUP_ZZI_Q, DUP_ZZI_S)>;
+
+// [89] "dup $Zd, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUP_ZI_B, DUP_ZI_D, DUP_ZI_H, DUP_ZI_S)>;
+
+// [90] "dupm $Zd, $imms";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs DUPM_ZI)>;
+
+// [91] "eor $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EOR_PPzPP)>;
+
+// [92] "eor $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZZZ)>;
+
+// [93] "eor $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs EOR_ZPmZ_B, EOR_ZPmZ_D, EOR_ZPmZ_H, EOR_ZPmZ_S)>;
+
+// [94] "eor $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs EOR_ZI)>;
+
+// [95] "eors $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs EORS_PPzPP)>;
+
+// [96] "eorv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs EORV_VPZ_B, EORV_VPZ_D, EORV_VPZ_H, EORV_VPZ_S)>;
+
+// [97] "ext $Zdn, $_Zdn, $Zm, $imm8";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs EXT_ZZI)>;
+
+// [99] "fabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FABD_ZPmZ_D, FABD_ZPmZ_H, FABD_ZPmZ_S)>;
+
+// [100] "fabs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FABS_ZPmZ_D, FABS_ZPmZ_H, FABS_ZPmZ_S)>;
+
+// [101] "facge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGE_PPzZZ_D, FACGE_PPzZZ_H, FACGE_PPzZZ_S)>;
+
+// [102] "facgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FACGT_PPzZZ_D, FACGT_PPzZZ_H, FACGT_PPzZZ_S)>;
+
+// [103] "fadd $Zd, $Zn, $Zm"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZZZ_D, FADD_ZZZ_H, FADD_ZZZ_S)>;
+
+// [104] "fadd $Zdn, $Pg/m, $_Zdn, $Zm"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmZ_D, FADD_ZPmZ_H, FADD_ZPmZ_S)>;
+
+// [105] "fadd $Zdn, $Pg/m, $_Zdn, $i1"; def is line 1638
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FADD_ZPmI_D, FADD_ZPmI_H, FADD_ZPmI_S)>;
+
+// [106] "fadda $Vdn, $Pg, $_Vdn, $Zm";
+def : InstRW<[A64FXWrite_18Cyc_GI03], (instrs FADDA_VPZ_D, FADDA_VPZ_H, FADDA_VPZ_S)>;
+
+// [107] "faddv $Vd, $Pg, $Zn";
+// H : 4 / 6 / ([1,2]9 / [1]6) x 4 / [1,2]9 = 75 cycle
+// S : 4 / 6 / ([1,2]9 / [1]6) x 3 / [1,2]9 = 60 cycle
+// D : 4 / 6 / ([1,2]9 / [1]6) x 2 / [1,2]9 = 45 cycle
+def : InstRW<[A64FXWrite_75Cyc_GI03], (instrs FADDV_VPZ_H)>;
+def : InstRW<[A64FXWrite_60Cyc_GI03], (instrs FADDV_VPZ_S)>;
+def : InstRW<[A64FXWrite_45Cyc_GI03], (instrs FADDV_VPZ_D)>;
+
+// [108] "fcadd $Zdn, $Pg/m, $_Zdn, $Zm, $imm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCADD_ZPmZ_D, FCADD_ZPmZ_H, FCADD_ZPmZ_S)>;
+
+// [109] "fcmeq $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZ0_D, FCMEQ_PPzZ0_H, FCMEQ_PPzZ0_S)>;
+
+// [110] "fcmeq $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMEQ_PPzZZ_D, FCMEQ_PPzZZ_H, FCMEQ_PPzZZ_S)>;
+
+// [111] "fcmge $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZ0_D, FCMGE_PPzZ0_H, FCMGE_PPzZ0_S)>;
+
+// [112] "fcmge $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGE_PPzZZ_D, FCMGE_PPzZZ_H, FCMGE_PPzZZ_S)>;
+
+// [113] "fcmgt $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZ0_D, FCMGT_PPzZ0_H, FCMGT_PPzZ0_S)>;
+
+// [114] "fcmgt $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMGT_PPzZZ_D, FCMGT_PPzZZ_H, FCMGT_PPzZZ_S)>;
+
+// [115] "fcmla $Zda, $Pg/m, $Zn, $Zm, $imm";
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZPmZZ_D, FCMLA_ZPmZZ_H, FCMLA_ZPmZZ_S)>;
+
+// [116] "fcmla $Zda, $Zn, $Zm$iop, $imm";
+def : InstRW<[A64FXWrite_15Cyc_GI03], (instrs FCMLA_ZZZI_H, FCMLA_ZZZI_S)>;
+
+// [117] "fcmle $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLE_PPzZ0_D, FCMLE_PPzZ0_H, FCMLE_PPzZ0_S)>;
+
+// [118] "fcmlt $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMLT_PPzZ0_D, FCMLT_PPzZ0_H, FCMLT_PPzZ0_S)>;
+
+// [119] "fcmne $Pd, $Pg/z, $Zn, #0.0";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZ0_D, FCMNE_PPzZ0_H, FCMNE_PPzZ0_S)>;
+
+// [120] "fcmne $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMNE_PPzZZ_D, FCMNE_PPzZZ_H, FCMNE_PPzZZ_S)>;
+
+// [121] "fcmuo $Pd, $Pg/z, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCMUO_PPzZZ_D, FCMUO_PPzZZ_H, FCMUO_PPzZZ_S)>;
+
+// [122] "fcpy $Zd, $Pg/m, $imm8";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FCPY_ZPmI_D, FCPY_ZPmI_H, FCPY_ZPmI_S)>;
+
+// [123] "fcvt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVT_ZPmZ_DtoH, FCVT_ZPmZ_DtoS, FCVT_ZPmZ_HtoD, FCVT_ZPmZ_HtoS, FCVT_ZPmZ_StoD, FCVT_ZPmZ_StoH)>;
+
+// [124] "fcvtzs $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZS_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoS, FCVTZS_ZPmZ_HtoD, FCVTZS_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoS, FCVTZS_ZPmZ_StoD, FCVTZS_ZPmZ_StoS)>;
+
+// [125] "fcvtzu $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FCVTZU_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoS, FCVTZU_ZPmZ_HtoD, FCVTZU_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoS, FCVTZU_ZPmZ_StoD, FCVTZU_ZPmZ_StoS)>;
+
+// [126] "fdiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIV_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIV_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIV_ZPmZ_S)>;
+
+// [127] "fdivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FDIVR_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FDIVR_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FDIVR_ZPmZ_S)>;
+
+// [128] "fdup $Zd, $imm8";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FDUP_ZI_D, FDUP_ZI_H, FDUP_ZI_S)>;
+
+// [129] "fexpa $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FEXPA_ZZ_D, FEXPA_ZZ_H, FEXPA_ZZ_S)>;
+
+// [130] "fmad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMAD_ZPmZZ_D, FMAD_ZPmZZ_H, FMAD_ZPmZZ_S)>;
+
+// [131] "fmax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAX_ZPmZ_D, FMAX_ZPmZ_H, FMAX_ZPmZ_S)>;
+
+// [132] "fmax $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAX_ZPmI_D, FMAX_ZPmI_H, FMAX_ZPmI_S)>;
+
+// [133] "fmaxnm $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMAXNM_ZPmZ_D, FMAXNM_ZPmZ_H, FMAXNM_ZPmZ_S)>;
+
+// [134] "fmaxnm $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMAXNM_ZPmI_D, FMAXNM_ZPmI_H, FMAXNM_ZPmI_S)>;
+
+// [135] "fmaxnmv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXNMV_VPZ_D, FMAXNMV_VPZ_H, FMAXNMV_VPZ_S)>;
+
+// [136] "fmaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMAXV_VPZ_D, FMAXV_VPZ_H, FMAXV_VPZ_S)>;
+
+// [137] "fmin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMIN_ZPmZ_D, FMIN_ZPmZ_H, FMIN_ZPmZ_S)>;
+
+// [138] "fmin $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMIN_ZPmI_D, FMIN_ZPmI_H, FMIN_ZPmI_S)>;
+
+// [139] "fminnm $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FMINNM_ZPmZ_D, FMINNM_ZPmZ_H, FMINNM_ZPmZ_S)>;
+
+// [140] "fminnm $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs FMINNM_ZPmI_D, FMINNM_ZPmI_H, FMINNM_ZPmI_S)>;
+
+// [141] "fminnmv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINNMV_VPZ_D, FMINNMV_VPZ_H, FMINNMV_VPZ_S)>;
+
+// [142] "fminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_10Cyc_GI03], (instrs FMINV_VPZ_D, FMINV_VPZ_H, FMINV_VPZ_S)>;
+
+// [143] "fmla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZPmZZ_D, FMLA_ZPmZZ_H, FMLA_ZPmZZ_S)>;
+
+// [144] "fmla $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLA_ZZZI_D, FMLA_ZZZI_H, FMLA_ZZZI_S)>;
+
+// [145] "fmls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZPmZZ_D, FMLS_ZPmZZ_H, FMLS_ZPmZZ_S)>;
+
+// [146] "fmls $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FMLS_ZZZI_D, FMLS_ZZZI_H, FMLS_ZZZI_S)>;
+
+// [147] "fmsb $Zdn, $Pg/m, $Zm, $Za";
+
+// [148] "fmul $Zd, $Zn, $Zm";
+
+// [149] "fmul $Zd, $Zn, $Zm$iop";
+
+// [150] "fmul $Zdn, $Pg/m, $_Zdn, $Zm";
+
+// [151] "fmul $Zdn, $Pg/m, $_Zdn, $i1";
+
+// [152] "fmulx $Zdn, $Pg/m, $_Zdn, $Zm";
+
+// [153] "fneg $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FNEG_ZPmZ_D, FNEG_ZPmZ_H, FNEG_ZPmZ_S)>;
+
+// [154] "fnmad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMAD_ZPmZZ_D, FNMAD_ZPmZZ_H, FNMAD_ZPmZZ_S)>;
+
+// [155] "fnmla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLA_ZPmZZ_D, FNMLA_ZPmZZ_H, FNMLA_ZPmZZ_S)>;
+
+// [156] "fnmls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMLS_ZPmZZ_D, FNMLS_ZPmZZ_H, FNMLS_ZPmZZ_S)>;
+
+// [157] "fnmsb $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FNMSB_ZPmZZ_D, FNMSB_ZPmZZ_H, FNMSB_ZPmZZ_S)>;
+
+// [158] "frecpe $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPE_ZZ_D, FRECPE_ZZ_H, FRECPE_ZZ_S)>;
+
+// [159] "frecps $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRECPS_ZZZ_D, FRECPS_ZZZ_H, FRECPS_ZZZ_S)>;
+
+// [160] "frecpx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRECPX_ZPmZ_D, FRECPX_ZPmZ_H, FRECPX_ZPmZ_S)>;
+
+// [161] "frinta $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTA_ZPmZ_D, FRINTA_ZPmZ_H, FRINTA_ZPmZ_S)>;
+
+// [162] "frinti $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTI_ZPmZ_D, FRINTI_ZPmZ_H, FRINTI_ZPmZ_S)>;
+
+// [163] "frintm $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTM_ZPmZ_D, FRINTM_ZPmZ_H, FRINTM_ZPmZ_S)>;
+
+// [164] "frintn $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTN_ZPmZ_D, FRINTN_ZPmZ_H, FRINTN_ZPmZ_S)>;
+
+// [165] "frintp $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTP_ZPmZ_D, FRINTP_ZPmZ_H, FRINTP_ZPmZ_S)>;
+
+// [166] "frintx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTX_ZPmZ_D, FRINTX_ZPmZ_H, FRINTX_ZPmZ_S)>;
+
+// [167] "frintz $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRINTZ_ZPmZ_D, FRINTZ_ZPmZ_H, FRINTZ_ZPmZ_S)>;
+
+// [168] "frsqrte $Zd, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FRSQRTE_ZZ_D, FRSQRTE_ZZ_H, FRSQRTE_ZZ_S)>;
+
+// [169] "frsqrts $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FRSQRTS_ZZZ_D, FRSQRTS_ZZZ_H, FRSQRTS_ZZZ_S)>;
+
+// [170] "fscale $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSCALE_ZPmZ_D, FSCALE_ZPmZ_H, FSCALE_ZPmZ_S)>;
+
+// [171] "fsqrt $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_154Cyc_GI0], (instrs FSQRT_ZPmZ_D)>;
+def : InstRW<[A64FXWrite_134Cyc_GI0], (instrs FSQRT_ZPmZ_H)>;
+def : InstRW<[A64FXWrite_98Cyc_GI0], (instrs FSQRT_ZPmZ_S)>;
+
+// [172] "fsub $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZZZ_D, FSUB_ZZZ_H, FSUB_ZZZ_S)>;
+
+// [173] "fsub $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUB_ZPmZ_D, FSUB_ZPmZ_H, FSUB_ZPmZ_S)>;
+
+// [174] "fsub $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUB_ZPmI_D, FSUB_ZPmI_H, FSUB_ZPmI_S)>;
+
+// [175] "fsubr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FSUBR_ZPmZ_D, FSUBR_ZPmZ_H, FSUBR_ZPmZ_S)>;
+
+// [176] "fsubr $Zdn, $Pg/m, $_Zdn, $i1";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs FSUBR_ZPmI_D, FSUBR_ZPmI_H, FSUBR_ZPmI_S)>;
+
+// [177] "ftmad $Zdn, $_Zdn, $Zm, $imm3";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTMAD_ZZI_D, FTMAD_ZZI_H, FTMAD_ZZI_S)>;
+
+// [178] "ftsmul $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs FTSMUL_ZZZ_D, FTSMUL_ZZZ_H, FTSMUL_ZZZ_S)>;
+
+// [180] "incb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCB_XPiI)>;
+
+// [181] "incd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCD_XPiI)>;
+
+// [182] "incd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCD_ZPiI)>;
+
+// [183] "inch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCH_XPiI)>;
+
+// [184] "inch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCH_ZPiI)>;
+
+// [185] "incp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_6Cyc_GI124], (instrs INCP_XP_B, INCP_XP_D, INCP_XP_H, INCP_XP_S)>;
+
+// [186] "incp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs INCP_ZP_D, INCP_ZP_H, INCP_ZP_S)>;
+
+// [187] "incw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs INCW_XPiI)>;
+
+// [188] "incw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs INCW_ZPiI)>;
+
+// [189] "index $Zd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_17Cyc_GI02], (instrs INDEX_RR_B, INDEX_RR_D, INDEX_RR_H, INDEX_RR_S)>;
+
+// [190] "index $Zd, $Rn, $imm5";
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_RI_B, INDEX_RI_D, INDEX_RI_H, INDEX_RI_S)>;
+
+// [191] "index $Zd, $imm5, $Rm";
+def : InstRW<[A64FXWrite_21Cyc_GI02], (instrs INDEX_IR_B, INDEX_IR_D, INDEX_IR_H, INDEX_IR_S)>;
+
+// [192] "index $Zd, $imm5, $imm5b";
+def : InstRW<[A64FXWrite_13Cyc_GI0], (instrs INDEX_II_B, INDEX_II_D, INDEX_II_H, INDEX_II_S)>;
+
+// [193] "insr $Zdn, $Rm";
+def : InstRW<[A64FXWrite_10Cyc_GI02], (instrs INSR_ZR_B, INSR_ZR_D, INSR_ZR_H, INSR_ZR_S)>;
+
+// [194] "insr $Zdn, $Vm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs INSR_ZV_B, INSR_ZV_D, INSR_ZV_H, INSR_ZV_S)>;
+
+// [195] "lasta $Rd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTA_RPZ_B, LASTA_RPZ_D, LASTA_RPZ_H, LASTA_RPZ_S)>;
+
+// [196] "lasta $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTA_VPZ_B, LASTA_VPZ_D, LASTA_VPZ_H, LASTA_VPZ_S)>;
+
+// [197] "lastb $Rd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_25Cyc_GI056], (instrs LASTB_RPZ_B, LASTB_RPZ_D, LASTB_RPZ_H, LASTB_RPZ_S)>;
+
+// [198] "lastb $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs LASTB_VPZ_B, LASTB_VPZ_D, LASTB_VPZ_H, LASTB_VPZ_S)>;
+
+// [199] "ld1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B, LD1B_D, LD1B_H, LD1B_S)>;
+
+// [200] "ld1b $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1B_D_REAL, GLD1B_D_SXTW_REAL, GLD1B_D_UXTW_REAL, GLD1B_S_SXTW_REAL, GLD1B_S_UXTW_REAL)>;
+
+// [201] "ld1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1B_D_IMM_REAL, LD1B_H_IMM_REAL, LD1B_IMM_REAL, LD1B_S_IMM_REAL)>;
+
+// [202] "ld1b $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1B_D_IMM_REAL, GLD1B_S_IMM_REAL)>;
+
+// [203] "ld1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D)>;
+
+// [204] "ld1d $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1D_REAL, GLD1D_SCALED_REAL, GLD1D_SXTW_REAL, GLD1D_SXTW_SCALED_REAL, GLD1D_UXTW_REAL, GLD1D_UXTW_SCALED_REAL)>;
+
+// [205] "ld1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1D_IMM_REAL)>;
+
+// [206] "ld1d $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1D_IMM_REAL)>;
+
+// [207] "ld1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H, LD1H_D, LD1H_S)>;
+
+// [208] "ld1h $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1H_D_REAL, GLD1H_D_SCALED_REAL, GLD1H_D_SXTW_REAL, GLD1H_D_SXTW_SCALED_REAL, GLD1H_D_UXTW_REAL, GLD1H_D_UXTW_SCALED_REAL, GLD1H_S_SXTW_REAL, GLD1H_S_SXTW_SCALED_REAL, GLD1H_S_UXTW_REAL, GLD1H_S_UXTW_SCALED_REAL)>;
+
+// [209] "ld1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1H_D_IMM_REAL, LD1H_IMM_REAL, LD1H_S_IMM_REAL)>;
+
+// [210] "ld1h $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1H_D_IMM_REAL, GLD1H_S_IMM_REAL)>;
+
+// [211] "ld1rb $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RB_D_IMM, LD1RB_H_IMM, LD1RB_IMM, LD1RB_S_IMM)>;
+
+// [212] "ld1rd $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RD_IMM)>;
+
+// [213] "ld1rh $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RH_D_IMM, LD1RH_IMM, LD1RH_S_IMM)>;
+
+// [214] "ld1rqb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B)>;
+
+// [215] "ld1rqb $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_B_IMM)>;
+
+// [216] "ld1rqd $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D)>;
+
+// [217] "ld1rqd $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_D_IMM)>;
+
+// [218] "ld1rqh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H)>;
+
+// [219] "ld1rqh $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_H_IMM)>;
+
+// [220] "ld1rqw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W)>;
+
+// [221] "ld1rqw $Zt, $Pg/z, [$Rn, $imm4]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RQ_W_IMM)>;
+
+// [222] "ld1rsb $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSB_D_IMM, LD1RSB_H_IMM, LD1RSB_S_IMM)>;
+
+// [223] "ld1rsh $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSH_D_IMM, LD1RSH_S_IMM)>;
+
+// [224] "ld1rsw $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RSW_IMM)>;
+
+// [225] "ld1rw $Zt, $Pg/z, [$Rn, $imm6]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1RW_D_IMM, LD1RW_IMM)>;
+
+// [226] "ld1sb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D, LD1SB_H, LD1SB_S)>;
+
+// [227] "ld1sb $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SB_D_REAL, GLD1SB_D_SXTW_REAL, GLD1SB_D_UXTW_REAL, GLD1SB_S_SXTW_REAL, GLD1SB_S_UXTW_REAL)>;
+
+// [228] "ld1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SB_D_IMM_REAL, LD1SB_H_IMM_REAL, LD1SB_S_IMM_REAL)>;
+
+// [229] "ld1sb $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SB_D_IMM_REAL, GLD1SB_S_IMM_REAL)>;
+
+// [230] "ld1sh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D, LD1SH_S)>;
+
+// [231] "ld1sh $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SH_D_REAL, GLD1SH_D_SCALED_REAL, GLD1SH_D_SXTW_REAL, GLD1SH_D_SXTW_SCALED_REAL, GLD1SH_D_UXTW_REAL, GLD1SH_D_UXTW_SCALED_REAL, GLD1SH_S_SXTW_REAL, GLD1SH_S_SXTW_SCALED_REAL, GLD1SH_S_UXTW_REAL, GLD1SH_S_UXTW_SCALED_REAL)>;
+
+// [232] "ld1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SH_D_IMM_REAL, LD1SH_S_IMM_REAL)>;
+
+// [233] "ld1sh $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SH_D_IMM_REAL, GLD1SH_S_IMM_REAL)>;
+
+// [234] "ld1sw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D)>;
+
+// [235] "ld1sw $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1SW_D_REAL, GLD1SW_D_SCALED_REAL, GLD1SW_D_SXTW_REAL, GLD1SW_D_SXTW_SCALED_REAL, GLD1SW_D_UXTW_REAL, GLD1SW_D_UXTW_SCALED_REAL)>;
+
+// [236] "ld1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1SW_D_IMM_REAL)>;
+
+// [237] "ld1sw $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1SW_D_IMM_REAL)>;
+
+// [238] "ld1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W, LD1W_D)>;
+
+// [239] "ld1w $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLD1W_D_REAL, GLD1W_D_SCALED_REAL, GLD1W_D_SXTW_REAL, GLD1W_D_SXTW_SCALED_REAL, GLD1W_D_UXTW_REAL, GLD1W_D_UXTW_SCALED_REAL, GLD1W_SXTW_REAL, GLD1W_SXTW_SCALED_REAL, GLD1W_UXTW_REAL, GLD1W_UXTW_SCALED_REAL)>;
+
+// [240] "ld1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD1W_D_IMM_REAL, LD1W_IMM_REAL)>;
+
+// [241] "ld1w $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLD1W_D_IMM_REAL, GLD1W_IMM_REAL)>;
+
+// [242] "ld2b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B)>;
+
+// [243] "ld2b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2B_IMM)>;
+
+// [244] "ld2d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D)>;
+
+// [245] "ld2d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2D_IMM)>;
+
+// [246] "ld2h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H)>;
+
+// [247] "ld2h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD2H_IMM)>;
+
+// [248] "ld2w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W)>;
+
+// [249] "ld2w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD2W_IMM)>;
+
+// [250] "ld3b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B)>;
+
+// [251] "ld3b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3B_IMM)>;
+
+// [252] "ld3d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D)>;
+
+// [253] "ld3d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3D_IMM)>;
+
+// [254] "ld3h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H)>;
+
+// [255] "ld3h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD3H_IMM)>;
+
+// [256] "ld3w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W)>;
+
+// [257] "ld3w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD3W_IMM)>;
+
+// [258] "ld4b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B)>;
+
+// [259] "ld4b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_44Cyc_GI56], (instrs LD4B_IMM)>;
+
+// [260] "ld4d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D)>;
+
+// [261] "ld4d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4D_IMM)>;
+
+// [262] "ld4h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H)>;
+
+// [263] "ld4h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4H_IMM)>;
+
+// [264] "ld4w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W)>;
+
+// [265] "ld4w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LD4W_IMM)>;
+
+// [266] "ldff1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1B_D_REAL, LDFF1B_H_REAL, LDFF1B_REAL, LDFF1B_S_REAL)>;
+
+// [267] "ldff1b $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1B_D_REAL, GLDFF1B_D_SXTW_REAL, GLDFF1B_D_UXTW_REAL, GLDFF1B_S_SXTW_REAL, GLDFF1B_S_UXTW_REAL)>;
+
+// [268] "ldff1b $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1B_D_IMM_REAL, GLDFF1B_S_IMM_REAL)>;
+
+// [269] "ldff1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1D_REAL)>;
+
+// [270] "ldff1d $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1D_REAL, GLDFF1D_SCALED_REAL, GLDFF1D_SXTW_REAL, GLDFF1D_SXTW_SCALED_REAL, GLDFF1D_UXTW_REAL, GLDFF1D_UXTW_SCALED_REAL)>;
+
+// [271] "ldff1d $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1D_IMM_REAL)>;
+
+// [272] "ldff1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1H_D_REAL, LDFF1H_REAL, LDFF1H_S_REAL)>;
+
+// [273] "ldff1h $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1H_D_REAL, GLDFF1H_D_SCALED_REAL, GLDFF1H_D_SXTW_REAL, GLDFF1H_D_SXTW_SCALED_REAL, GLDFF1H_D_UXTW_REAL, GLDFF1H_D_UXTW_SCALED_REAL, GLDFF1H_S_SXTW_REAL, GLDFF1H_S_SXTW_SCALED_REAL, GLDFF1H_S_UXTW_REAL, GLDFF1H_S_UXTW_SCALED_REAL)>;
+
+// [274] "ldff1h $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1H_D_IMM_REAL, GLDFF1H_S_IMM_REAL)>;
+
+// [275] "ldff1sb $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SB_D_REAL, LDFF1SB_H_REAL, LDFF1SB_S_REAL)>;
+
+// [276] "ldff1sb $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SB_D_REAL, GLDFF1SB_D_SXTW_REAL, GLDFF1SB_D_UXTW_REAL, GLDFF1SB_S_SXTW_REAL, GLDFF1SB_S_UXTW_REAL)>;
+
+// [277] "ldff1sb $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SB_D_IMM_REAL, GLDFF1SB_S_IMM_REAL)>;
+
+// [278] "ldff1sh $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SH_D_REAL, LDFF1SH_S_REAL)>;
+
+// [279] "ldff1sh $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SH_D_REAL, GLDFF1SH_D_SCALED_REAL, GLDFF1SH_D_SXTW_REAL, GLDFF1SH_D_SXTW_SCALED_REAL, GLDFF1SH_D_UXTW_REAL, GLDFF1SH_D_UXTW_SCALED_REAL, GLDFF1SH_S_SXTW_REAL, GLDFF1SH_S_SXTW_SCALED_REAL, GLDFF1SH_S_UXTW_REAL, GLDFF1SH_S_UXTW_SCALED_REAL)>;
+
+// [280] "ldff1sh $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SH_D_IMM_REAL, GLDFF1SH_S_IMM_REAL)>;
+
+// [281] "ldff1sw $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1SW_D_REAL)>;
+
+// [282] "ldff1sw $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1SW_D_REAL, GLDFF1SW_D_SCALED_REAL, GLDFF1SW_D_SXTW_REAL, GLDFF1SW_D_SXTW_SCALED_REAL, GLDFF1SW_D_UXTW_REAL, GLDFF1SW_D_UXTW_SCALED_REAL)>;
+
+// [283] "ldff1sw $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1SW_D_IMM_REAL)>;
+
+// [284] "ldff1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDFF1W_D_REAL, LDFF1W_REAL)>;
+
+// [285] "ldff1w $Zt, $Pg/z, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_19Cyc_GI0256], (instrs GLDFF1W_D_REAL, GLDFF1W_D_SCALED_REAL, GLDFF1W_D_SXTW_REAL, GLDFF1W_D_SXTW_SCALED_REAL, GLDFF1W_D_UXTW_REAL, GLDFF1W_D_UXTW_SCALED_REAL, GLDFF1W_SXTW_REAL, GLDFF1W_SXTW_SCALED_REAL, GLDFF1W_UXTW_REAL, GLDFF1W_UXTW_SCALED_REAL)>;
+
+// [286] "ldff1w $Zt, $Pg/z, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_15Cyc_GI056], (instrs GLDFF1W_D_IMM_REAL, GLDFF1W_IMM_REAL)>;
+
+// [287] "ldnf1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1B_D_IMM_REAL, LDNF1B_H_IMM_REAL, LDNF1B_IMM_REAL, LDNF1B_S_IMM_REAL)>;
+
+// [288] "ldnf1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1D_IMM_REAL)>;
+
+// [289] "ldnf1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1H_D_IMM_REAL, LDNF1H_IMM_REAL, LDNF1H_S_IMM_REAL)>;
+
+// [290] "ldnf1sb $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SB_D_IMM_REAL, LDNF1SB_H_IMM_REAL, LDNF1SB_S_IMM_REAL)>;
+
+// [291] "ldnf1sh $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SH_D_IMM_REAL, LDNF1SH_S_IMM_REAL)>;
+
+// [292] "ldnf1sw $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1SW_D_IMM_REAL)>;
+
+// [293] "ldnf1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNF1W_D_IMM_REAL, LDNF1W_IMM_REAL)>;
+
+// [294] "ldnt1b $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRR)>;
+
+// [295] "ldnt1b $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1B_ZRI)>;
+
+// [296] "ldnt1d $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRR)>;
+
+// [297] "ldnt1d $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1D_ZRI)>;
+
+// [298] "ldnt1h $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRR)>;
+
+// [299] "ldnt1h $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1H_ZRI)>;
+
+// [300] "ldnt1w $Zt, $Pg/z, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRR)>;
+
+// [301] "ldnt1w $Zt, $Pg/z, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI56], (instrs LDNT1W_ZRI)>;
+
+// [302] "ldr $Pt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_PXI)>;
+
+// [303] "ldr $Zt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_11Cyc_GI5], (instrs LDR_ZXI)>;
+
+// [304] "lsl $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZZZ_B, LSL_WIDE_ZZZ_H, LSL_WIDE_ZZZ_S)>;
+
+// [305] "lsl $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZZI_B, LSL_ZZI_D, LSL_ZZI_H, LSL_ZZI_S)>;
+
+// [306] "lsl $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_WIDE_ZPmZ_B, LSL_WIDE_ZPmZ_H, LSL_WIDE_ZPmZ_S, LSL_ZPmZ_B, LSL_ZPmZ_D, LSL_ZPmZ_H, LSL_ZPmZ_S)>;
+
+// [307] "lsl $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSL_ZPmI_B, LSL_ZPmI_D, LSL_ZPmI_H, LSL_ZPmI_S)>;
+
+// [308] "lslr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSLR_ZPmZ_B, LSLR_ZPmZ_D, LSLR_ZPmZ_H, LSLR_ZPmZ_S)>;
+
+// [309] "lsr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZZZ_B, LSR_WIDE_ZZZ_H, LSR_WIDE_ZZZ_S)>;
+
+// [310] "lsr $Zd, $Zn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZZI_B, LSR_ZZI_D, LSR_ZZI_H, LSR_ZZI_S)>;
+
+// [311] "lsr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_WIDE_ZPmZ_B, LSR_WIDE_ZPmZ_H, LSR_WIDE_ZPmZ_S, LSR_ZPmZ_B, LSR_ZPmZ_D, LSR_ZPmZ_H, LSR_ZPmZ_S)>;
+
+// [312] "lsr $Zdn, $Pg/m, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSR_ZPmI_B, LSR_ZPmI_D, LSR_ZPmI_H, LSR_ZPmI_S)>;
+
+// [313] "lsrr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs LSRR_ZPmZ_B, LSRR_ZPmZ_D, LSRR_ZPmZ_H, LSRR_ZPmZ_S)>;
+
+// [314] "mad $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MAD_ZPmZZ_B, MAD_ZPmZZ_D, MAD_ZPmZZ_H, MAD_ZPmZZ_S)>;
+
+// [315] "mla $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLA_ZPmZZ_B, MLA_ZPmZZ_D, MLA_ZPmZZ_H, MLA_ZPmZZ_S)>;
+
+// [316] "mls $Zda, $Pg/m, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MLS_ZPmZZ_B, MLS_ZPmZZ_D, MLS_ZPmZZ_H, MLS_ZPmZZ_S)>;
+
+// [317] "movprfx $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPmZ_B, MOVPRFX_ZPmZ_D, MOVPRFX_ZPmZ_H, MOVPRFX_ZPmZ_S)>;
+
+// [318] "movprfx $Zd, $Pg/z, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZPzZ_B, MOVPRFX_ZPzZ_D, MOVPRFX_ZPzZ_H, MOVPRFX_ZPzZ_S)>;
+
+// [319] "movprfx $Zd, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs MOVPRFX_ZZ)>;
+
+// [320] "msb $Zdn, $Pg/m, $Zm, $Za";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MSB_ZPmZZ_B, MSB_ZPmZZ_D, MSB_ZPmZZ_H, MSB_ZPmZZ_S)>;
+
+// [321] "mul $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs MUL_ZPmZ_B, MUL_ZPmZ_D, MUL_ZPmZ_H, MUL_ZPmZ_S)>;
+
+// [322] "mul $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_9Cyc_GI0], (instrs MUL_ZI_B, MUL_ZI_D, MUL_ZI_H, MUL_ZI_S)>;
+
+// [323] "nand $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NAND_PPzPP)>;
+
+// [324] "nands $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NANDS_PPzPP)>;
+
+// [325] "neg $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NEG_ZPmZ_B, NEG_ZPmZ_D, NEG_ZPmZ_H, NEG_ZPmZ_S)>;
+
+// [326] "nor $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NOR_PPzPP)>;
+
+// [327] "nors $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs NORS_PPzPP)>;
+
+// [328] "not $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs NOT_ZPmZ_B, NOT_ZPmZ_D, NOT_ZPmZ_H, NOT_ZPmZ_S)>;
+
+// [329] "orn $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORN_PPzPP)>;
+
+// [330] "orns $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORNS_PPzPP)>;
+
+// [331] "orr $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORR_PPzPP)>;
+
+// [332] "orr $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZZZ)>;
+
+// [333] "orr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs ORR_ZPmZ_B, ORR_ZPmZ_D, ORR_ZPmZ_H, ORR_ZPmZ_S)>;
+
+// [334] "orr $Zdn, $_Zdn, $imms13";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs ORR_ZI)>;
+
+// [335] "orrs $Pd, $Pg/z, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs ORRS_PPzPP)>;
+
+// [336] "orv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs ORV_VPZ_B, ORV_VPZ_D, ORV_VPZ_H, ORV_VPZ_S)>;
+
+// [337] "pfalse $Pd";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PFALSE)>;
+
+// [338] "pnext $Pdn, $Pg, $_Pdn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PNEXT_B, PNEXT_D, PNEXT_H, PNEXT_S)>;
+
+// [339] "prfb $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRR)>;
+
+// [340] "prfb $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFB_D_SCALED, PRFB_D_SXTW_SCALED, PRFB_D_UXTW_SCALED, PRFB_S_SXTW_SCALED, PRFB_S_UXTW_SCALED)>;
+
+// [341] "prfb $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFB_PRI)>;
+
+// [342] "prfb $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFB_D_PZI, PRFB_S_PZI)>;
+
+// [343] "prfd $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRR)>;
+
+// [344] "prfd $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFD_D_SCALED, PRFD_D_SXTW_SCALED, PRFD_D_UXTW_SCALED, PRFD_S_SXTW_SCALED, PRFD_S_UXTW_SCALED)>;
+
+// [345] "prfd $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFD_PRI)>;
+
+// [346] "prfd $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFD_D_PZI, PRFD_S_PZI)>;
+
+// [347] "prfh $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRR)>;
+
+// [348] "prfh $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFH_D_SCALED, PRFH_D_SXTW_SCALED, PRFH_D_UXTW_SCALED, PRFH_S_SXTW_SCALED, PRFH_S_UXTW_SCALED)>;
+
+// [349] "prfh $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFH_PRI)>;
+
+// [350] "prfh $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFH_D_PZI, PRFH_S_PZI)>;
+
+// [351] "prfw $prfop, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFS_PRR)>;
+
+// [352] "prfw $prfop, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_14Cyc_GI0256], (instrs PRFW_D_SCALED, PRFW_D_SXTW_SCALED, PRFW_D_UXTW_SCALED, PRFW_S_SXTW_SCALED, PRFW_S_UXTW_SCALED)>;
+
+// [353] "prfw $prfop, $Pg, [$Rn, $imm6, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI56], (instrs PRFW_PRI)>;
+
+// [354] "prfw $prfop, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_10Cyc_GI056], (instrs PRFW_D_PZI, PRFW_S_PZI)>;
+
+// [355] "ptest $Pg, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTEST_PP)>;
+
+// [356] "ptrue $Pd, $pattern";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUE_B, PTRUE_D, PTRUE_H, PTRUE_S)>;
+
+// [357] "ptrues $Pd, $pattern";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PTRUES_B, PTRUES_D, PTRUES_H, PTRUES_S)>;
+
+// [358] "punpkhi $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKHI_PP)>;
+
+// [359] "punpklo $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs PUNPKLO_PP)>;
+
+// [360] "rbit $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBIT_ZPmZ_B, RBIT_ZPmZ_D, RBIT_ZPmZ_H, RBIT_ZPmZ_S)>;
+
+// [361] "rdffr $Pd";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_P)>;
+
+// [362] "rdffr $Pd, $Pg/z";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFR_PPz)>;
+
+// [363] "rdffrs $Pd, $Pg/z";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs RDFFRS_PPz)>;
+
+// [364] "rdvl $Rd, $imm6";
+def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs RDVLI_XI)>;
+
+// [365] "rev $Pd, $Pn";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs REV_PP_B, REV_PP_D, REV_PP_H, REV_PP_S)>;
+
+// [366] "rev $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs REV_ZZ_B, REV_ZZ_D, REV_ZZ_H, REV_ZZ_S)>;
+
+// [367] "revb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVB_ZPmZ_D, REVB_ZPmZ_H, REVB_ZPmZ_S)>;
+
+// [368] "revh $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVH_ZPmZ_D, REVH_ZPmZ_S)>;
+
+// [369] "revw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs REVW_ZPmZ_D)>;
+
+// [370] "sabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SABD_ZPmZ_B, SABD_ZPmZ_D, SABD_ZPmZ_H, SABD_ZPmZ_S)>;
+
+// [371] "saddv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs SADDV_VPZ_B, SADDV_VPZ_H, SADDV_VPZ_S)>;
+
+// [372] "scvtf $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SCVTF_ZPmZ_DtoD, SCVTF_ZPmZ_DtoH, SCVTF_ZPmZ_DtoS, SCVTF_ZPmZ_HtoH, SCVTF_ZPmZ_StoD, SCVTF_ZPmZ_StoH, SCVTF_ZPmZ_StoS)>;
+
+// [373] "sdiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIV_ZPmZ_D, SDIV_ZPmZ_S)>;
+
+// [374] "sdivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs SDIVR_ZPmZ_D, SDIVR_ZPmZ_S)>;
+
+// [375] "sdot $Zda, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SDOT_ZZZ_D, SDOT_ZZZ_S)>;
+
+// [376] "sdot $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs SDOT_ZZZI_D, SDOT_ZZZI_S)>;
+
+// [377] "sel $Pd, $Pg, $Pn, $Pm";
+def : InstRW<[A64FXWrite_3Cyc_GI1], (instrs SEL_PPPP)>;
+
+// [378] "sel $Zd, $Pg, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SEL_ZPZZ_B, SEL_ZPZZ_D, SEL_ZPZZ_H, SEL_ZPZZ_S)>;
+
+// [379] "setffr";
+def : InstRW<[A64FXWrite_6Cyc], (instrs SETFFR)>;
+
+// [380] "smax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMAX_ZPmZ_B, SMAX_ZPmZ_D, SMAX_ZPmZ_H, SMAX_ZPmZ_S)>;
+
+// [381] "smax $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMAX_ZI_B, SMAX_ZI_D, SMAX_ZI_H, SMAX_ZI_S)>;
+
+// [382] "smaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMAXV_VPZ_B, SMAXV_VPZ_D, SMAXV_VPZ_H, SMAXV_VPZ_S)>;
+
+// [383] "smin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SMIN_ZPmZ_B, SMIN_ZPmZ_D, SMIN_ZPmZ_H, SMIN_ZPmZ_S)>;
+
+// [384] "smin $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SMIN_ZI_B, SMIN_ZI_D, SMIN_ZI_H, SMIN_ZI_S)>;
+
+// [385] "sminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs SMINV_VPZ_B, SMINV_VPZ_D, SMINV_VPZ_H, SMINV_VPZ_S)>;
+
+// [386] "smulh $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs SMULH_ZPmZ_B, SMULH_ZPmZ_D, SMULH_ZPmZ_H, SMULH_ZPmZ_S)>;
+
+// [387] "splice $Zdn, $Pg, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SPLICE_ZPZ_B, SPLICE_ZPZ_D, SPLICE_ZPZ_H, SPLICE_ZPZ_S)>;
+
+// [388] "sqadd $Zd, $Zn, $Zm";
+
+// [389] "sqadd $Zdn, $_Zdn, $imm";
+
+// [390] "sqdecb $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiWdI)>;
+
+// [391] "sqdecb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECB_XPiI)>;
+
+// [392] "sqdecd $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiWdI)>;
+
+// [393] "sqdecd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECD_XPiI)>;
+
+// [394] "sqdecd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECD_ZPiI)>;
+
+// [395] "sqdech $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiWdI)>;
+
+// [396] "sqdech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECH_XPiI)>;
+
+// [397] "sqdech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECH_ZPiI)>;
+
+// [398] "sqdecp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XP_B, SQDECP_XP_D, SQDECP_XP_H, SQDECP_XP_S)>;
+
+// [399] "sqdecp $Rdn, $Pg, $_Rdn";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQDECP_XPWd_B, SQDECP_XPWd_D, SQDECP_XPWd_H, SQDECP_XPWd_S)>;
+
+// [400] "sqdecp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQDECP_ZP_D, SQDECP_ZP_H, SQDECP_ZP_S)>;
+
+// [401] "sqdecw $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiWdI)>;
+
+// [402] "sqdecw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQDECW_XPiI)>;
+
+// [403] "sqdecw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQDECW_ZPiI)>;
+
+// [404] "sqincb $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiWdI)>;
+
+// [405] "sqincb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCB_XPiI)>;
+
+// [406] "sqincd $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiWdI)>;
+
+// [407] "sqincd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCD_XPiI)>;
+
+// [408] "sqincd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCD_ZPiI)>;
+
+// [409] "sqinch $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiWdI)>;
+
+// [410] "sqinch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCH_XPiI)>;
+
+// [411] "sqinch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCH_ZPiI)>;
+
+// [412] "sqincp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XP_B, SQINCP_XP_D, SQINCP_XP_H, SQINCP_XP_S)>;
+
+// [413] "sqincp $Rdn, $Pg, $_Rdn";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs SQINCP_XPWd_B, SQINCP_XPWd_D, SQINCP_XPWd_H, SQINCP_XPWd_S)>;
+
+// [414] "sqincp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs SQINCP_ZP_D, SQINCP_ZP_H, SQINCP_ZP_S)>;
+
+// [415] "sqincw $Rdn, $_Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiWdI)>;
+
+// [416] "sqincw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs SQINCW_XPiI)>;
+
+// [417] "sqincw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SQINCW_ZPiI)>;
+
+// [418] "sqsub $Zd, $Zn, $Zm";
+
+// [419] "sqsub $Zdn, $_Zdn, $imm";
+
+// [420] "st1b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B, ST1B_D, ST1B_H, ST1B_S)>;
+
+// [421] "st1b $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1B_D_REAL, SST1B_D_SXTW, SST1B_D_UXTW, SST1B_S_SXTW, SST1B_S_UXTW)>;
+
+// [422] "st1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1B_D_IMM, ST1B_H_IMM, ST1B_IMM, ST1B_S_IMM)>;
+
+// [423] "st1b $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1B_D_IMM, SST1B_S_IMM)>;
+
+// [424] "st1d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D)>;
+
+// [425] "st1d $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1D_REAL, SST1D_SCALED_SCALED_REAL, SST1D_SXTW, SST1D_SXTW_SCALED, SST1D_UXTW, SST1D_UXTW_SCALED)>;
+
+// [426] "st1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1D_IMM)>;
+
+// [427] "st1d $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1D_IMM)>;
+
+// [428] "st1h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H, ST1H_D, ST1H_S)>;
+
+// [429] "st1h $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1H_D_REAL, SST1H_D_SCALED_SCALED_REAL, SST1H_D_SXTW, SST1H_D_SXTW_SCALED, SST1H_D_UXTW, SST1H_D_UXTW_SCALED, SST1H_S_SXTW, SST1H_S_SXTW_SCALED, SST1H_S_UXTW, SST1H_S_UXTW_SCALED)>;
+
+// [430] "st1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1H_D_IMM, ST1H_IMM, ST1H_S_IMM)>;
+
+// [431] "st1h $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1H_D_IMM, SST1H_S_IMM)>;
+
+// [432] "st1w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W, ST1W_D)>;
+
+// [433] "st1w $Zt, $Pg, [$Rn, $Zm]";
+def : InstRW<[A64FXWrite_ST1W_19], (instrs SST1W_D_REAL, SST1W_D_SCALED_SCALED_REAL, SST1W_D_SXTW, SST1W_D_SXTW_SCALED, SST1W_D_UXTW, SST1W_D_UXTW_SCALED, SST1W_SXTW, SST1W_SXTW_SCALED, SST1W_UXTW, SST1W_UXTW_SCALED)>;
+
+// [434] "st1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs ST1W_D_IMM, ST1W_IMM)>;
+
+// [435] "st1w $Zt, $Pg, [$Zn, $imm5]";
+def : InstRW<[A64FXWrite_ST1W_15], (instrs SST1W_D_IMM, SST1W_IMM)>;
+
+// [436] "st2b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B)>;
+
+// [437] "st2b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2B_IMM)>;
+
+// [438] "st2d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D)>;
+
+// [439] "st2d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2D_IMM)>;
+
+// [440] "st2h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H)>;
+
+// [441] "st2h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2H_IMM)>;
+
+// [442] "st2w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W)>;
+
+// [443] "st2w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST2W_7], (instrs ST2W_IMM)>;
+
+// [444] "st3b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B)>;
+
+// [445] "st3b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3B_IMM)>;
+
+// [446] "st3d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D)>;
+
+// [447] "st3d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3D_IMM)>;
+
+// [448] "st3h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H)>;
+
+// [449] "st3h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3H_IMM)>;
+
+// [450] "st3w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W)>;
+
+// [451] "st3w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST3W_8], (instrs ST3W_IMM)>;
+
+// [452] "st4b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B)>;
+
+// [453] "st4b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4B_IMM)>;
+
+// [454] "st4d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D)>;
+
+// [455] "st4d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4D_IMM)>;
+
+// [456] "st4h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H)>;
+
+// [457] "st4h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4H_IMM)>;
+
+// [458] "st4w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W)>;
+
+// [459] "st4w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST4W_9], (instrs ST4W_IMM)>;
+
+// [460] "stnt1b $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRR)>;
+
+// [461] "stnt1b $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1B_ZRI)>;
+
+// [462] "stnt1d $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRR)>;
+
+// [463] "stnt1d $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1D_ZRI)>;
+
+// [464] "stnt1h $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRR)>;
+
+// [465] "stnt1h $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1H_ZRI)>;
+
+// [466] "stnt1w $Zt, $Pg, [$Rn, $Rm]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRR)>;
+
+// [467] "stnt1w $Zt, $Pg, [$Rn, $imm4, mul vl]";
+def : InstRW<[A64FXWrite_ST1W_6], (instrs STNT1W_ZRI)>;
+
+// [468] "str $Pt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI15], (instrs STR_PXI)>;
+
+// [469] "str $Zt, [$Rn, $imm9, mul vl]";
+def : InstRW<[A64FXWrite_6Cyc_GI05], (instrs STR_ZXI)>;
+
+// [470] "sub $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZZZ_B, SUB_ZZZ_D, SUB_ZZZ_H, SUB_ZZZ_S)>;
+
+// [471] "sub $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZPmZ_B, SUB_ZPmZ_D, SUB_ZPmZ_H, SUB_ZPmZ_S)>;
+
+// [472] "sub $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUB_ZI_B, SUB_ZI_D, SUB_ZI_H, SUB_ZI_S)>;
+
+// [473] "subr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SUBR_ZPmZ_B, SUBR_ZPmZ_D, SUBR_ZPmZ_H, SUBR_ZPmZ_S)>;
+
+// [474] "subr $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs SUBR_ZI_B, SUBR_ZI_D, SUBR_ZI_H, SUBR_ZI_S)>;
+
+// [475] "sunpkhi $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKHI_ZZ_D, SUNPKHI_ZZ_H, SUNPKHI_ZZ_S)>;
+
+// [476] "sunpklo $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs SUNPKLO_ZZ_D, SUNPKLO_ZZ_H, SUNPKLO_ZZ_S)>;
+
+// [477] "sxtb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTB_ZPmZ_D, SXTB_ZPmZ_H, SXTB_ZPmZ_S)>;
+
+// [478] "sxth $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTH_ZPmZ_D, SXTH_ZPmZ_S)>;
+
+// [479] "sxtw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs SXTW_ZPmZ_D)>;
+
+// [480] "tbl $Zd, $Zn, $Zm";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs TBL_ZZZ_B, TBL_ZZZ_D, TBL_ZZZ_H, TBL_ZZZ_S)>;
+
+// [481] "trn1 $Pd, $Pn, $Pm";
+
+// [482] "trn1 $Zd, $Zn, $Zm";
+
+// [483] "trn2 $Pd, $Pn, $Pm";
+
+// [484] "trn2 $Zd, $Zn, $Zm";
+
+// [486] "uabd $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UABD_ZPmZ_B, UABD_ZPmZ_D, UABD_ZPmZ_H, UABD_ZPmZ_S)>;
+
+// [487] "uaddv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_12Cyc_GI03], (instrs UADDV_VPZ_B, UADDV_VPZ_D, UADDV_VPZ_H, UADDV_VPZ_S)>;
+
+// [488] "ucvtf $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UCVTF_ZPmZ_DtoD, UCVTF_ZPmZ_DtoH, UCVTF_ZPmZ_DtoS, UCVTF_ZPmZ_HtoH, UCVTF_ZPmZ_StoD, UCVTF_ZPmZ_StoH, UCVTF_ZPmZ_StoS)>;
+
+// [489] "udiv $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIV_ZPmZ_D, UDIV_ZPmZ_S)>;
+
+// [490] "udivr $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_37Cyc_GI0], (instrs UDIVR_ZPmZ_D, UDIVR_ZPmZ_S)>;
+
+// [491] "udot $Zda, $Zn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UDOT_ZZZ_D, UDOT_ZZZ_S)>;
+
+// [492] "udot $Zda, $Zn, $Zm$iop";
+def : InstRW<[A64FXWrite_15Cyc_NGI03], (instrs UDOT_ZZZI_D, UDOT_ZZZI_S)>;
+
+// [493] "umax $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMAX_ZPmZ_B, UMAX_ZPmZ_D, UMAX_ZPmZ_H, UMAX_ZPmZ_S)>;
+
+// [494] "umax $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMAX_ZI_B, UMAX_ZI_D, UMAX_ZI_H, UMAX_ZI_S)>;
+
+// [495] "umaxv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMAXV_VPZ_B, UMAXV_VPZ_D, UMAXV_VPZ_H, UMAXV_VPZ_S)>;
+
+// [496] "umin $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UMIN_ZPmZ_B, UMIN_ZPmZ_D, UMIN_ZPmZ_H, UMIN_ZPmZ_S)>;
+
+// [497] "umin $Zdn, $_Zdn, $imm";
+def : InstRW<[A64FXWrite_4Cyc_GI0], (instrs UMIN_ZI_B, UMIN_ZI_D, UMIN_ZI_H, UMIN_ZI_S)>;
+
+// [498] "uminv $Vd, $Pg, $Zn";
+def : InstRW<[A64FXWrite_14Cyc_GI03], (instrs UMINV_VPZ_B, UMINV_VPZ_D, UMINV_VPZ_H, UMINV_VPZ_S)>;
+
+// [499] "umulh $Zdn, $Pg/m, $_Zdn, $Zm";
+def : InstRW<[A64FXWrite_9Cyc_GI03], (instrs UMULH_ZPmZ_B, UMULH_ZPmZ_D, UMULH_ZPmZ_H, UMULH_ZPmZ_S)>;
+
+// [500] "uqadd $Zd, $Zn, $Zm";
+
+// [501] "uqadd $Zdn, $_Zdn, $imm";
+
+// [502] "uqdecb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECB_WPiI, UQDECB_XPiI)>;
+
+// [503] "uqdecd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECD_WPiI, UQDECD_XPiI)>;
+
+// [504] "uqdecd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECD_ZPiI)>;
+
+// [505] "uqdech $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECH_WPiI, UQDECH_XPiI)>;
+
+// [506] "uqdech $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECH_ZPiI)>;
+
+// [507] "uqdecp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQDECP_WP_B, UQDECP_WP_D, UQDECP_WP_H, UQDECP_WP_S, UQDECP_XP_B, UQDECP_XP_D, UQDECP_XP_H, UQDECP_XP_S)>;
+
+// [508] "uqdecp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQDECP_ZP_D, UQDECP_ZP_H, UQDECP_ZP_S)>;
+
+// [509] "uqdecw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQDECW_WPiI, UQDECW_XPiI)>;
+
+// [510] "uqdecw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQDECW_ZPiI)>;
+
+// [511] "uqincb $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCB_WPiI, UQINCB_XPiI)>;
+
+// [512] "uqincd $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCD_WPiI, UQINCD_XPiI)>;
+
+// [513] "uqincd $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCD_ZPiI)>;
+
+// [514] "uqinch $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCH_WPiI, UQINCH_XPiI)>;
+
+// [515] "uqinch $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCH_ZPiI)>;
+
+// [516] "uqincp $Rdn, $Pg";
+def : InstRW<[A64FXWrite_8Cyc_GI124], (instrs UQINCP_WP_B, UQINCP_WP_D, UQINCP_WP_H, UQINCP_WP_S, UQINCP_XP_B, UQINCP_XP_D, UQINCP_XP_H, UQINCP_XP_S)>;
+
+// [517] "uqincp $Zdn, $Pg";
+def : InstRW<[A64FXWrite_12Cyc_GI01], (instrs UQINCP_ZP_D, UQINCP_ZP_H, UQINCP_ZP_S)>;
+
+// [518] "uqincw $Rdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs UQINCW_WPiI, UQINCW_XPiI)>;
+
+// [519] "uqincw $Zdn, $pattern, mul $imm4";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQINCW_ZPiI)>;
+
+// [520] "uqsub $Zd, $Zn, $Zm";
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZZZ_B, UQSUB_ZZZ_D, UQSUB_ZZZ_H, UQSUB_ZZZ_S)>;
+
+// [521] "uqsub $Zdn, $_Zdn, $imm";
+//@@@ def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UQSUB_ZI_B, UQSUB_ZI_D, UQSUB_ZI_H, UQSUB_ZI_S)>;
+
+// [522] "uunpkhi $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKHI_ZZ_D, UUNPKHI_ZZ_H, UUNPKHI_ZZ_S)>;
+
+// [523] "uunpklo $Zd, $Zn";
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs UUNPKLO_ZZ_D, UUNPKLO_ZZ_H, UUNPKLO_ZZ_S)>;
+
+// [524] "uxtb $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTB_ZPmZ_D, UXTB_ZPmZ_H, UXTB_ZPmZ_S)>;
+
+// [525] "uxth $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTH_ZPmZ_D, UXTH_ZPmZ_S)>;
+
+// [526] "uxtw $Zd, $Pg/m, $Zn";
+def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs UXTW_ZPmZ_D)>;
+
+// [527] "uzp1 $Pd, $Pn, $Pm";
+
+// [528] "uzp1 $Zd, $Zn, $Zm";
+
+// [529] "uzp2 $Pd, $Pn, $Pm";
+
+// [530] "uzp2 $Zd, $Zn, $Zm";
+
+// [531] "whilele $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELE_PWW_B, WHILELE_PWW_D, WHILELE_PWW_H, WHILELE_PWW_S, WHILELE_PXX_B, WHILELE_PXX_D, WHILELE_PXX_H, WHILELE_PXX_S)>;
+
+// [532] "whilelo $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELO_PWW_B, WHILELO_PWW_D, WHILELO_PWW_H, WHILELO_PWW_S, WHILELO_PXX_B, WHILELO_PXX_D, WHILELO_PXX_H, WHILELO_PXX_S)>;
+
+// [533] "whilels $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELS_PWW_B, WHILELS_PWW_D, WHILELS_PWW_H, WHILELS_PWW_S, WHILELS_PXX_B, WHILELS_PXX_D, WHILELS_PXX_H, WHILELS_PXX_S)>;
+
+// [534] "whilelt $Pd, $Rn, $Rm";
+def : InstRW<[A64FXWrite_4Cyc_GI12], (instrs WHILELT_PWW_B, WHILELT_PWW_D, WHILELT_PWW_H, WHILELT_PWW_S, WHILELT_PXX_B, WHILELT_PXX_D, WHILELT_PXX_H, WHILELT_PXX_S)>;
+
+// [535] "wrffr $Pn";
+def : InstRW<[A64FXWrite_6Cyc_NGI1], (instrs WRFFR)>;
+
+// [536] "zip1 $Pd, $Pn, $Pm";
+
+// [537] "zip1 $Zd, $Zn, $Zm";
+
+// [538] "zip2 $Pd, $Pn, $Pm";
+
+// [539] "zip2 $Zd, $Zn, $Zm";
+
+} // SchedModel = A64FXModel
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
index 438371c1b6..0828d8a8c9 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -1,745 +1,745 @@
-//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for Huawei TSV110 to support
-// instruction scheduling and other instruction cost heuristics.
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
-
-// Huawei TSV110 scheduling machine model.
-def TSV110Model : SchedMachineModel {
- let IssueWidth = 4; // 4 micro-ops dispatched per cycle.
- let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
- let LoopMicroOpBufferSize = 16;
- let LoadLatency = 4; // Optimistic load latency.
- let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
- let CompleteModel = 1;
-
- list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
- PAUnsupported.F);
-}
-
-// Define each kind of processor resource and number available on the TSV110,
-// which has 8 pipelines, each with its own queue where micro-ops wait for
-// their operands and issue out-of-order to one of eight execution pipelines.
-let SchedModel = TSV110Model in {
- def TSV110UnitALU : ProcResource<1>; // Int ALU
- def TSV110UnitAB : ProcResource<2>; // Int ALU/BRU
- def TSV110UnitMDU : ProcResource<1>; // Multi-Cycle
- def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
- def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
- def TSV110UnitLdSt : ProcResource<2>; // Load/Store
-
- def TSV110UnitF : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
- def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
- def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
-}
-
-let SchedModel = TSV110Model in {
-
-//===----------------------------------------------------------------------===//
-// Map the target-defined scheduler read/write resources and latency for
-// TSV110
-
-// Integer ALU
-def : WriteRes<WriteImm, [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteI, [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteISReg, [TSV110UnitMDU]> { let Latency = 2; }
-def : WriteRes<WriteIEReg, [TSV110UnitMDU]> { let Latency = 2; }
-def : WriteRes<WriteExtr, [TSV110UnitALUAB]> { let Latency = 1; }
-def : WriteRes<WriteIS, [TSV110UnitALUAB]> { let Latency = 1; }
-
-// Integer Mul/MAC/Div
-def : WriteRes<WriteID32, [TSV110UnitMDU]> { let Latency = 12;
- let ResourceCycles = [12]; }
-def : WriteRes<WriteID64, [TSV110UnitMDU]> { let Latency = 20;
- let ResourceCycles = [20]; }
-def : WriteRes<WriteIM32, [TSV110UnitMDU]> { let Latency = 3; }
-def : WriteRes<WriteIM64, [TSV110UnitMDU]> { let Latency = 4; }
-
-// Load
-def : WriteRes<WriteLD, [TSV110UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteLDHi, []> { let Latency = 4; }
-
-// Pre/Post Indexing
-def : WriteRes<WriteAdr, [TSV110UnitALUAB]> { let Latency = 1; }
-
-// Store
-def : WriteRes<WriteST, [TSV110UnitLdSt]> { let Latency = 1; }
-def : WriteRes<WriteSTP, [TSV110UnitLdSt]> { let Latency = 1; }
-def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
-
-// FP
-def : WriteRes<WriteF, [TSV110UnitF]> { let Latency = 2; }
-def : WriteRes<WriteFCmp, [TSV110UnitF]> { let Latency = 3; }
-def : WriteRes<WriteFCvt, [TSV110UnitF]> { let Latency = 3; }
-def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
-def : WriteRes<WriteFImm, [TSV110UnitF]> { let Latency = 2; }
-def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
-
-// FP Div, Sqrt
-def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
-
-def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
-def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
-def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
-
-// Branch
-def : WriteRes<WriteBr, [TSV110UnitAB]> { let Latency = 1; }
-def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
-def : WriteRes<WriteSys, []> { let Latency = 1; }
-def : WriteRes<WriteBarrier, []> { let Latency = 1; }
-def : WriteRes<WriteHint, []> { let Latency = 1; }
-
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
-
-// Forwarding logic is modeled only for multiply and accumulate.
-def : ReadAdvance<ReadI, 0>;
-def : ReadAdvance<ReadISReg, 0>;
-def : ReadAdvance<ReadIEReg, 0>;
-def : ReadAdvance<ReadIM, 0>;
-def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
-def : ReadAdvance<ReadID, 0>;
-def : ReadAdvance<ReadExtrHi, 0>;
-def : ReadAdvance<ReadAdrBase, 0>;
-def : ReadAdvance<ReadVLD, 0>;
-
-def : InstRW<[WriteI], (instrs COPY)>;
-
-// Detailed Refinements
-//===----------------------------------------------------------------------===//
-
-// Contains all of the TSV110 specific SchedWriteRes types. The approach
-// below is to define a generic SchedWriteRes for every combination of
-// latency and microOps. The naming conventions is to use a prefix, one field
-// for latency, and one or more microOp count/type designators.
-// Prefix: TSV110Wr
-// Latency: #cyc
-// MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
-//
-// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
-// 1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
-//
-
-//===----------------------------------------------------------------------===//
-// Define Generic 1 micro-op types
-
-def TSV110Wr_1cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 1; }
-def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; }
-def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
-def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 1; }
-
-def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; }
-def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; }
-def TSV110Wr_2cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 2; }
-def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; }
-def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; }
-def TSV110Wr_2cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 2; }
-
-def TSV110Wr_3cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 3; }
-def TSV110Wr_3cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 3; }
-def TSV110Wr_3cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 3; }
-
-def TSV110Wr_4cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 4; }
-def TSV110Wr_4cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 4; }
-def TSV110Wr_4cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 4; }
-def TSV110Wr_4cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 4; }
-
-def TSV110Wr_5cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 5; }
-def TSV110Wr_5cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 5; }
-def TSV110Wr_5cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 5; }
-def TSV110Wr_5cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 5; }
-
-def TSV110Wr_6cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 6; }
-
-def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; }
-
-def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; }
-
-def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; }
-
-def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; }
-
-def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; }
-
-def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; }
-
-def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; }
-
-def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; }
-
-def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; }
-
-def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; }
-
-def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; }
-
-def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; }
-
-//===----------------------------------------------------------------------===//
-// Define Generic 2 micro-op types
-
-def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitALUAB]> {
- let Latency = 1;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitALUAB]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitLdSt]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_2F : SchedWriteRes<[TSV110UnitF,
- TSV110UnitF]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_2cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
- TSV110UnitFSU2]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_2F : SchedWriteRes<[TSV110UnitF,
- TSV110UnitF]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
- TSV110UnitFSU2]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitALUAB]> {
- let Latency = 4;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_5cyc_1ALU_1F : SchedWriteRes<[TSV110UnitALU,
- TSV110UnitF]> {
- let Latency = 5;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_6cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitLdSt]> {
- let Latency = 6;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
- TSV110UnitALUAB]> {
- let Latency = 6;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_7cyc_1F_1LdSt : SchedWriteRes<[TSV110UnitF,
- TSV110UnitLdSt]> {
- let Latency = 7;
- let NumMicroOps = 2;
-}
-
-def TSV110Wr_8cyc_2FSU1 : SchedWriteRes<[TSV110UnitFSU1,
- TSV110UnitFSU1]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-
-def TSV110Wr_8cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
- TSV110UnitFSU2]> {
- let Latency = 8;
- let NumMicroOps = 2;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 3 micro-op types
-
-def TSV110Wr_6cyc_3F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
- TSV110UnitF]> {
- let Latency = 6;
- let NumMicroOps = 3;
-}
-
-def TSV110Wr_6cyc_3LdSt : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
- TSV110UnitLdSt]> {
- let Latency = 6;
- let NumMicroOps = 3;
-}
-
-def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
- TSV110UnitLdSt]> {
- let Latency = 7;
- let NumMicroOps = 3;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 4 micro-op types
-
-def TSV110Wr_8cyc_4F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
- TSV110UnitF, TSV110UnitF]> {
- let Latency = 8;
- let NumMicroOps = 4;
-}
-
-def TSV110Wr_8cyc_3F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
- TSV110UnitF, TSV110UnitLdSt]> {
- let Latency = 8;
- let NumMicroOps = 4;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 5 micro-op types
-
-def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
- TSV110UnitLdSt, TSV110UnitLdSt]> {
- let Latency = 8;
- let NumMicroOps = 5;
-}
-
-//===----------------------------------------------------------------------===//
-// Define Generic 8 micro-op types
-
-def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
- TSV110UnitF, TSV110UnitF,
- TSV110UnitLdSt, TSV110UnitLdSt,
- TSV110UnitLdSt, TSV110UnitLdSt]> {
- let Latency = 10;
- let NumMicroOps = 8;
-}
-
-
-// Branch Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
-
-
-// Cryptography Extensions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
-def : InstRW<[TSV110Wr_2cyc_2F], (instregex "^SHA1(H|SU0)")>;
-def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
-def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
-def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
-def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC], (instregex "^CRC32.*$")>;
-
-
-// Arithmetic and Logical Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(BIC)S[WX]rr")>;
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(ADC|SBC)S[WX]r$")>;
-
-def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
-def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
-def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
-def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
-
-def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
-
-
-// Move and Shift Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
-
-
-// Divide and Multiply Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_12cyc_1MDU], (instregex "^(S|U)DIVWr$")>;
-def : InstRW<[TSV110Wr_20cyc_1MDU], (instregex "^(S|U)DIVXr$")>;
-
-def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
-def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
-def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
-def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
-def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
-def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
-
-
-// Miscellaneous Data-Processing Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^EXTR(W|X)rri$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(S|U)?BFM(W|X)ri$")>;
-def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
-
-
-// Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(W|X)l$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
-
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMui$")>;
-def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
-
-
-// Store Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
-
-
-// FP Data Processing Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
-
-def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
-def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
-def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
-def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
-def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
-
-
-// FP Miscellaneous Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT[HSD][HSD]r")>;
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
-
-
-// FP Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
-def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
-
-
-// FP Store Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR[BHSDQ]i")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
-def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
-def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
-def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
-
-
-// ASIMD Integer Instructions
-// -----------------------------------------------------------------------------
-
-// Reference for forms in this group
-// D form - v8i8, v4i16, v2i32
-// Q form - v16i8, v8i16, v4i32
-// D form - v1i8, v1i16, v1i32, v1i64
-// Q form - v16i8, v8i16, v4i32, v2i64
-// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
-// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
-
-// ASIMD simple arithmetic
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
-
-// ASIMD complex arithmetic
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
-
-// ASIMD compare
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
-
-// ASIMD max/min
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
-
-// ASIMD logical
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
-
-// ASIMD multiply accumulate, D-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
-// ASIMD multiply accumulate, Q-form
-def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
-
-// ASIMD multiply accumulate long
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
-
-// ASIMD shift
-// ASIMD shift accumulate
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
-// ASIMD shift by immed, basic
-def : InstRW<[TSV110Wr_4cyc_1FSU1],
- (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
-// ASIMD shift by immed, complex
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
-// ASIMD shift by register, basic, Q-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
-// ASIMD shift by register, complex, D-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
-// ASIMD shift by register, complex, Q-form
-def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
-
-// ASIMD reduction
-// ASIMD arith, reduce, 4H/4S
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
-// ASIMD arith, reduce, 8B/8H
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
-// ASIMD arith, reduce, 16B
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
-
-// ASIMD max/min, reduce, 4H/4S
-def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
-// ASIMD max/min, reduce, 8B/8H
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
-// ASIMD max/min, reduce, 16B
-def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
-
-
-// Vector - Floating Point
-// -----------------------------------------------------------------------------
-
-// Reference for forms in this group
-// D form - v2f32
-// Q form - v4f32, v2f64
-// D form - 32, 64
-// D form - v1i32, v1i64
-// D form - v2i32
-// Q form - v4i32, v2i64
-
-// ASIMD FP sign manipulation
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FABSv")>;
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FNEGv")>;
-
-// ASIMD FP compare
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
-
-// ASIMD FP convert
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FCVT[AMNPZ][SU]v")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT(L)v")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FCVT(N|XN)v")>;
-
-// ASIMD FP divide, D-form, F32
-def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
-// ASIMD FP divide, Q-form, F32
-def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
-// ASIMD FP divide, Q-form, F64
-def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
-
-// ASIMD FP SQRT
-def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
-def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
-def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
-
-// ASIMD FP max,min
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?v")>;
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?Pv")>;
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(MAX|MIN)(NM)?Vv")>;
-
-// ASIMD FP add
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|ADDP|SUB)v")>;
-
-// ASIMD FP multiply
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FMULX?v")>;
-
-
-// ASIMD Miscellaneous Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
-
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
-def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
-
-def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
-
-// ASIMD table lookup, D-form
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
-def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
-def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
-def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
-// ASIMD table lookup, Q-form
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
-def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
-def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
-def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
-
-def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
-
-
-// ASIMD Load Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
-
-def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-
-// ASIMD Store Instructions
-// -----------------------------------------------------------------------------
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
-
-def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-
-} // SchedModel = TSV110Model
+//==- AArch64SchedTSV110.td - Huawei TSV110 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Huawei TSV110 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Huawei TSV110 scheduling machine model.
+def TSV110Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoopMicroOpBufferSize = 16;
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
+ PAUnsupported.F);
+}
+
+// Define each kind of processor resource and number available on the TSV110,
+// which has 8 pipelines, each with its own queue where micro-ops wait for
+// their operands and issue out-of-order to one of eight execution pipelines.
+let SchedModel = TSV110Model in {
+ def TSV110UnitALU : ProcResource<1>; // Int ALU
+ def TSV110UnitAB : ProcResource<2>; // Int ALU/BRU
+ def TSV110UnitMDU : ProcResource<1>; // Multi-Cycle
+ def TSV110UnitFSU1 : ProcResource<1>; // FP/ASIMD
+ def TSV110UnitFSU2 : ProcResource<1>; // FP/ASIMD
+ def TSV110UnitLdSt : ProcResource<2>; // Load/Store
+
+ def TSV110UnitF : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2]>;
+ def TSV110UnitALUAB : ProcResGroup<[TSV110UnitALU, TSV110UnitAB]>;
+ def TSV110UnitFLdSt : ProcResGroup<[TSV110UnitFSU1, TSV110UnitFSU2, TSV110UnitLdSt]>;
+}
+
+let SchedModel = TSV110Model in {
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// TSV110
+
+// Integer ALU
+def : WriteRes<WriteImm, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteI, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [TSV110UnitMDU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [TSV110UnitMDU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [TSV110UnitALUAB]> { let Latency = 1; }
+def : WriteRes<WriteIS, [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Integer Mul/MAC/Div
+def : WriteRes<WriteID32, [TSV110UnitMDU]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def : WriteRes<WriteID64, [TSV110UnitMDU]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def : WriteRes<WriteIM32, [TSV110UnitMDU]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [TSV110UnitMDU]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [TSV110UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, [TSV110UnitALUAB]> { let Latency = 1; }
+
+// Store
+def : WriteRes<WriteST, [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [TSV110UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [TSV110UnitLdSt]> { let Latency = 1; }
+
+// FP
+def : WriteRes<WriteF, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFCmp, [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [TSV110UnitF]> { let Latency = 3; }
+def : WriteRes<WriteFCopy, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFImm, [TSV110UnitF]> { let Latency = 2; }
+def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
+
+// FP Div, Sqrt
+def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
+
+def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
+def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [TSV110UnitAB]> { let Latency = 1; }
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Forwarding logic is modeled only for multiply and accumulate.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Detailed Refinements
+//===----------------------------------------------------------------------===//
+
+// Contains all of the TSV110 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: TSV110Wr
+// Latency: #cyc
+// MicroOp Count/Types: #(ALU|AB|MDU|FSU1|FSU2|LdSt|ALUAB|F|FLdSt)
+//
+// e.g. TSV110Wr_6cyc_1ALU_6MDU_4LdSt means the total latency is 6 and there are
+// 1 micro-ops to be issued down one ALU pipe, six MDU pipes and four LdSt pipes.
+//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def TSV110Wr_1cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 1; }
+def TSV110Wr_1cyc_1ALUAB : SchedWriteRes<[TSV110UnitALUAB]> { let Latency = 1; }
+def TSV110Wr_1cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 1; }
+
+def TSV110Wr_2cyc_1AB : SchedWriteRes<[TSV110UnitAB]> { let Latency = 2; }
+def TSV110Wr_2cyc_1ALU : SchedWriteRes<[TSV110UnitALU]> { let Latency = 2; }
+def TSV110Wr_2cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 2; }
+def TSV110Wr_2cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 2; }
+def TSV110Wr_2cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 2; }
+def TSV110Wr_2cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 2; }
+
+def TSV110Wr_3cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 3; }
+def TSV110Wr_3cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 3; }
+def TSV110Wr_3cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 3; }
+
+def TSV110Wr_4cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 4; }
+def TSV110Wr_4cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 4; }
+def TSV110Wr_4cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 4; }
+def TSV110Wr_4cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 4; }
+
+def TSV110Wr_5cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 5; }
+def TSV110Wr_5cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 5; }
+def TSV110Wr_5cyc_1LdSt : SchedWriteRes<[TSV110UnitLdSt]> { let Latency = 5; }
+
+def TSV110Wr_6cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 6; }
+
+def TSV110Wr_7cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 7; }
+
+def TSV110Wr_8cyc_1F : SchedWriteRes<[TSV110UnitF]> { let Latency = 8; }
+
+def TSV110Wr_11cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 11; }
+
+def TSV110Wr_12cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 12; }
+
+def TSV110Wr_17cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 17; }
+
+def TSV110Wr_18cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 18; }
+
+def TSV110Wr_20cyc_1MDU : SchedWriteRes<[TSV110UnitMDU]> { let Latency = 20; }
+
+def TSV110Wr_24cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 24; }
+
+def TSV110Wr_31cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 31; }
+
+def TSV110Wr_36cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 36; }
+
+def TSV110Wr_38cyc_1FSU1 : SchedWriteRes<[TSV110UnitFSU1]> { let Latency = 38; }
+
+def TSV110Wr_64cyc_1FSU2 : SchedWriteRes<[TSV110UnitFSU2]> { let Latency = 64; }
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def TSV110Wr_1cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_2F : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_2cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_2F : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_4cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_5cyc_1ALU_1F : SchedWriteRes<[TSV110UnitALU,
+ TSV110UnitF]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_2LdSt : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_6cyc_1LdSt_1ALUAB : SchedWriteRes<[TSV110UnitLdSt,
+ TSV110UnitALUAB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_7cyc_1F_1LdSt : SchedWriteRes<[TSV110UnitF,
+ TSV110UnitLdSt]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def TSV110Wr_8cyc_2FSU1 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU1]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+
+def TSV110Wr_8cyc_1FSU1_1FSU2 : SchedWriteRes<[TSV110UnitFSU1,
+ TSV110UnitFSU2]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def TSV110Wr_6cyc_3F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def TSV110Wr_6cyc_3LdSt : SchedWriteRes<[TSV110UnitLdSt, TSV110UnitLdSt,
+ TSV110UnitLdSt]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def TSV110Wr_7cyc_2F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def TSV110Wr_8cyc_4F : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitF]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def TSV110Wr_8cyc_3F_1LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitLdSt]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def TSV110Wr_8cyc_3F_2LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt, TSV110UnitLdSt]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def TSV110Wr_10cyc_4F_4LdSt : SchedWriteRes<[TSV110UnitF, TSV110UnitF,
+ TSV110UnitF, TSV110UnitF,
+ TSV110UnitLdSt, TSV110UnitLdSt,
+ TSV110UnitLdSt, TSV110UnitLdSt]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs B)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BL)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instrs BLR)>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AES[DE]")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^AESI?MC")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA1SU1")>;
+def : InstRW<[TSV110Wr_2cyc_2F], (instregex "^SHA1(H|SU0)")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^SHA256SU0")>;
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^SHA256SU1")>;
+def : InstRW<[TSV110Wr_5cyc_1FSU1], (instregex "^SHA256(H|H2)")>;
+def TSV110ReadCRC: SchedReadAdvance<1, [TSV110Wr_2cyc_1MDU]>;
+def : InstRW<[TSV110Wr_2cyc_1MDU, TSV110ReadCRC], (instregex "^CRC32.*$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(BIC|EON|ORN)[WX]rr")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(BIC)S[WX]rr")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "(ADD|AND|EOR|ORR|SUB)S[WX]r(r|i)")>;
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(ADC|SBC|BIC)[WX]r$")>;
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(ADC|SBC)S[WX]r$")>;
+
+def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)S[WX]rs$")>;
+def : InstRW<[TSV110Wr_2cyc_1MDU], (instregex "^(ADD|SUB)[WX]r(s|x|x64)$")>;
+def : InstRW<[TSV110Wr_2cyc_1AB], (instregex "^(ADD|SUB)S[WX]r(s|x|x64)$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1AB], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
+
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instrs ADR, ADRP)>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^MOV[NZK][WX]i")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "(LSLV|LSRV|ASRV|RORV)(W|X)r")>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_12cyc_1MDU], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[TSV110Wr_20cyc_1MDU], (instregex "^(S|U)DIVXr$")>;
+
+def TSV110ReadMAW : SchedReadAdvance<2, [TSV110Wr_3cyc_1MDU]>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instrs MADDWrrr, MSUBWrrr)>;
+def TSV110ReadMAQ : SchedReadAdvance<3, [TSV110Wr_4cyc_1MDU]>;
+def : InstRW<[TSV110Wr_4cyc_1MDU, TSV110ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[TSV110Wr_3cyc_1MDU, TSV110ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+def : InstRW<[TSV110Wr_4cyc_1MDU], (instregex "^(S|U)MULHrr$")>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^EXTR(W|X)rri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[TSV110Wr_1cyc_1ALUAB], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(W|X)l$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs LDRSWl)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDR(BB|HH|W|X)ui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteAdr], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instregex "^LDP(W|X)i$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt_1ALUAB, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpost)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt, WriteLDHi, WriteAdr], (instrs LDPSWpre)>;
+
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFMl)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instrs PRFUMi)>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMui$")>;
+def : InstRW<[TSV110Wr_4cyc_1LdSt], (instregex "^PRFMro(W|X)$")>;
+
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STN?P(W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR(BB|HH|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR(BB|HH|W|X)ui$")>;
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt, WriteAdr], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
+
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "F(ABS|NEG)(D|S)r")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCCMP(E)?(S|D)rr$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCSEL(S|D)rrr$")>;
+
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instrs FDIVSrr)>;
+def : InstRW<[TSV110Wr_18cyc_1FSU1], (instrs FDIVDrr)>;
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTSr)>;
+def : InstRW<[TSV110Wr_31cyc_1FSU2], (instrs FSQRTDr)>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN).+rr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^FN?M(ADD|SUB)Hrrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FN?M(ADD|SUB)Srrr")>;
+def : InstRW<[TSV110Wr_7cyc_1F], (instregex "^FN?M(ADD|SUB)Drrr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Hrr")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|SUB)Srr")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(ADD|SUB)Drr")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(N)?MULHrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULSrr$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT.+r")>;
+
+
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1ALU_1F], (instregex "^[SU]CVTF[SU][WX][SD]ri")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT[HSD][HSD]r")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^FMOV(DX|WS|XD|SW|DXHigh|XDHigh)r$")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOV[SD][ir]$")>;
+
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[DSQ]l")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDUR[BDHSQ]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LDR[BDHSQ](post|pre)")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LDR[BDHSQ]ui")>;
+def : InstRW<[TSV110Wr_6cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi], (instregex "^LDN?P[DQS]i")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteLDHi, WriteAdr], (instregex "^LDP[DQS](post|pre)")>;
+
+
+// FP Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STUR[BHSDQ]i")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)")>;
+def : InstRW<[TSV110Wr_1cyc_1LdSt], (instregex "^STR[BHSDQ]ui")>;
+def : InstRW<[TSV110Wr_2cyc_1LdSt_1ALUAB, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt], (instregex "^STN?P[SDQ]i")>;
+def : InstRW<[TSV110Wr_2cyc_2LdSt, WriteAdr], (instregex "^STP[SDQ](post|pre)")>;
+
+
+// ASIMD Integer Instructions
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v8i8, v4i16, v2i32
+// Q form - v16i8, v8i16, v4i32
+// D form - v1i8, v1i16, v1i32, v1i64
+// Q form - v16i8, v8i16, v4i32, v2i64
+// D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64
+// Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64
+
+// ASIMD simple arithmetic
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(ABS|ADD(P)?|NEG|SUB)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](ADD(L|LP|W)|SUB(L|W))v")>;
+
+// ASIMD complex arithmetic
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]H(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^R?(ADD|SUB)HN2?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]Q(ADD|SUB)v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^(SU|US)QADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]RHADDv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]ADALPv")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^((SQ)(ABS|NEG))v")>;
+
+// ASIMD compare
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT|TST)v")>;
+
+// ASIMD max/min
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)P?v")>;
+
+// ASIMD logical
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(AND|BIC|BIF|BIT|BSL|EOR|MVN|NOT|ORN|ORR)v")>;
+
+// ASIMD multiply accumulate, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[TSV110Wr_8cyc_2FSU1], (instregex "^(MUL|ML[AS]|SQR?D(MULH))(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD shift
+// ASIMD shift accumulate
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^(S|SR|U|UR)SRA")>;
+// ASIMD shift by immed, basic
+def : InstRW<[TSV110Wr_4cyc_1FSU1],
+ (instregex "SHLv","SLIv","SRIv","SHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[TSV110Wr_4cyc_1FSU1], (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD reduction
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU]?ADDL?Vv16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[TSV110Wr_4cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B
+def : InstRW<[TSV110Wr_8cyc_1FSU1_1FSU2], (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+
+
+// Vector - Floating Point
+// -----------------------------------------------------------------------------
+
+// Reference for forms in this group
+// D form - v2f32
+// Q form - v4f32, v2f64
+// D form - 32, 64
+// D form - v1i32, v1i64
+// D form - v2i32
+// Q form - v4i32, v2i64
+
+// ASIMD FP sign manipulation
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FABSv")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FNEGv")>;
+
+// ASIMD FP compare
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v")>;
+
+// ASIMD FP convert
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FCVT[AMNPZ][SU]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FCVT(L)v")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FCVT(N|XN)v")>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[TSV110Wr_11cyc_1FSU1], (instregex "FDIVv2f32")>;
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[TSV110Wr_24cyc_1FSU1], (instregex "FDIVv4f32")>;
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[TSV110Wr_38cyc_1FSU1], (instregex "FDIVv2f64")>;
+
+// ASIMD FP SQRT
+def : InstRW<[TSV110Wr_17cyc_1FSU2], (instrs FSQRTv2f32)>;
+def : InstRW<[TSV110Wr_36cyc_1FSU2], (instrs FSQRTv4f32)>;
+def : InstRW<[TSV110Wr_64cyc_1FSU2], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max,min
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?v")>;
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^F(MAX|MIN)(NM)?Vv")>;
+
+// ASIMD FP add
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^F(ADD|ADDP|SUB)v")>;
+
+// ASIMD FP multiply
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^FMULX?v")>;
+
+
+// ASIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(CLS|CLZ|CNT)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(DUP|INS)v.+lane")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^(UZP|ZIP)[12]v")>;
+
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^EXTv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^XTNv")>;
+def : InstRW<[TSV110Wr_2cyc_1FSU1_1FSU2], (instregex "^RBITv")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^(INS|DUP)v.+gpr")>;
+
+def : InstRW<[TSV110Wr_3cyc_1FSU1], (instregex "^[SU]MOVv")>;
+
+// ASIMD table lookup, D-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v8i8Four")>;
+// ASIMD table lookup, Q-form
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[TSV110Wr_4cyc_2F], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[TSV110Wr_6cyc_3F], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[TSV110Wr_8cyc_4F], (instregex "^TB[LX]v16i8Four")>;
+
+def : InstRW<[TSV110Wr_2cyc_1F], (instregex "^FMOVv")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^FRINT[AIMNPXZ]v")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[SU]CVTFv")>;
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^[FU](RECP|RSQRT)(E|X)v")>;
+
+
+// ASIMD Load Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_7cyc_1F_1LdSt, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_2LdSt, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1LdSt, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_3LdSt, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_2LdSt, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_7cyc_2F_1LdSt, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_3F_1LdSt, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_10cyc_4F_4LdSt, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+
+// ASIMD Store Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+
+def : InstRW<[TSV110Wr_3cyc_1F], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_3cyc_1F, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[TSV110Wr_6cyc_1F], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_6cyc_1F, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_4cyc_1F], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_4cyc_1F, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_5cyc_1F], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_5cyc_1F, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[TSV110Wr_8cyc_1F], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[TSV110Wr_8cyc_1F, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+} // SchedModel = TSV110Model
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a5bc3668ed..38ab512c56 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -82,8 +82,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
unsigned OffsetScaled = 0;
while (OffsetScaled < ObjSizeScaled) {
if (ObjSizeScaled - OffsetScaled >= 2) {
- SDValue AddrNode =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+ SDValue AddrNode =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode2, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
@@ -95,8 +95,8 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
}
if (ObjSizeScaled - OffsetScaled > 0) {
- SDValue AddrNode =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
+ SDValue AddrNode =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(OffsetScaled * 16), dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode1, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
index ab49e0c3f9..93dfda439d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -59,7 +59,7 @@
using namespace llvm;
-#define DEBUG_TYPE "aarch64-stack-tagging"
+#define DEBUG_TYPE "aarch64-stack-tagging"
static cl::opt<bool> ClMergeInit(
"stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
@@ -73,10 +73,10 @@ static cl::opt<bool>
static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
cl::init(40), cl::Hidden);
-static cl::opt<unsigned>
- ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
- cl::Hidden);
-
+static cl::opt<unsigned>
+ ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
+ cl::Hidden);
+
static const Align kTagGranuleSize = Align(16);
namespace {
@@ -107,10 +107,10 @@ public:
SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
- auto I =
- llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
- return LHS.End <= RHS;
- });
+ auto I =
+ llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
+ return LHS.End <= RHS;
+ });
if (I != Ranges.end() && End > I->Start) {
// Overlap - bail.
return false;
@@ -439,8 +439,8 @@ void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
bool LittleEndian =
Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
// Current implementation of initializer merging assumes little endianness.
- if (MergeInit && !F->hasOptNone() && LittleEndian &&
- Size < ClMergeInitSizeLimit) {
+ if (MergeInit && !F->hasOptNone() && LittleEndian &&
+ Size < ClMergeInitSizeLimit) {
LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
<< ", size = " << Size << "\n");
InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
@@ -571,7 +571,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
auto *II = dyn_cast<IntrinsicInst>(I);
if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end)) {
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
if (!AI) {
UnrecognizedLifetimes.push_back(I);
continue;
@@ -659,7 +659,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
IntrinsicInst *Start = Info.LifetimeStart[0];
IntrinsicInst *End = Info.LifetimeEnd[0];
uint64_t Size =
- cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+ cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
// We need to ensure that if we tag some object, we certainly untag it
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 41096a9613..4e64b6116e 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -49,12 +49,12 @@ cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
"apply unchecked-ld-st when the target is definitely within range"),
clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
-static cl::opt<bool>
- ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
- cl::ZeroOrMore,
- cl::desc("Apply first slot optimization for stack tagging "
- "(eliminate ADDG Rt, Rn, 0, 0)."));
-
+static cl::opt<bool>
+ ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
+ cl::ZeroOrMore,
+ cl::desc("Apply first slot optimization for stack tagging "
+ "(eliminate ADDG Rt, Rn, 0, 0)."));
+
namespace {
class AArch64StackTaggingPreRA : public MachineFunctionPass {
@@ -76,7 +76,7 @@ public:
bool mayUseUncheckedLoadStore();
void uncheckUsesOf(unsigned TaggedReg, int FI);
void uncheckLoadsAndStores();
- Optional<int> findFirstSlotCandidate();
+ Optional<int> findFirstSlotCandidate();
bool runOnMachineFunction(MachineFunction &Func) override;
StringRef getPassName() const override {
@@ -203,141 +203,141 @@ void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
}
}
-struct SlotWithTag {
- int FI;
- int Tag;
- SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
- explicit SlotWithTag(const MachineInstr &MI)
- : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
- bool operator==(const SlotWithTag &Other) const {
- return FI == Other.FI && Tag == Other.Tag;
- }
-};
-
-namespace llvm {
-template <> struct DenseMapInfo<SlotWithTag> {
- static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
- static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
- static unsigned getHashValue(const SlotWithTag &V) {
- return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
- DenseMapInfo<int>::getHashValue(V.Tag));
- }
- static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
- return A == B;
- }
-};
-} // namespace llvm
-
-static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
- return MFI->getUseLocalStackAllocationBlock() &&
- MFI->isObjectPreAllocated(FI);
-}
-
-// Pin one of the tagged slots to offset 0 from the tagged base pointer.
-// This would make its address available in a virtual register (IRG's def), as
-// opposed to requiring an ADDG instruction to materialize. This effectively
-// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
-// live almost everywhere anyway), and therefore needs to happen before
-// regalloc.
-Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
- // Find the best (FI, Tag) pair to pin to offset 0.
- // Looking at the possible uses of a tagged address, the advantage of pinning
- // is:
- // - COPY to physical register.
- // Does not matter, this would trade a MOV instruction for an ADDG.
- // - ST*G matter, but those mostly appear near the function prologue where all
- // the tagged addresses need to be materialized anyway; also, counting ST*G
- // uses would overweight large allocas that require more than one ST*G
- // instruction.
- // - Load/Store instructions in the address operand do not require a tagged
- // pointer, so they also do not benefit. These operands have already been
- // eliminated (see uncheckLoadsAndStores) so all remaining load/store
- // instructions count.
- // - Any other instruction may benefit from being pinned to offset 0.
- LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
- if (!ClFirstSlot)
- return None;
-
- DenseMap<SlotWithTag, int> RetagScore;
- SlotWithTag MaxScoreST{-1, -1};
- int MaxScore = -1;
- for (auto *I : ReTags) {
- SlotWithTag ST{*I};
- if (isSlotPreAllocated(MFI, ST.FI))
- continue;
-
- Register RetagReg = I->getOperand(0).getReg();
- if (!Register::isVirtualRegister(RetagReg))
- continue;
-
- int Score = 0;
- SmallVector<Register, 8> WorkList;
- WorkList.push_back(RetagReg);
-
- while (!WorkList.empty()) {
- Register UseReg = WorkList.back();
- WorkList.pop_back();
- for (auto &UseI : MRI->use_instructions(UseReg)) {
- unsigned Opcode = UseI.getOpcode();
- if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
- Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
- Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
- Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
- Opcode == AArch64::STZGloop_wback)
- continue;
- if (UseI.isCopy()) {
- Register DstReg = UseI.getOperand(0).getReg();
- if (Register::isVirtualRegister(DstReg))
- WorkList.push_back(DstReg);
- continue;
- }
- LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
- << Register::virtReg2Index(UseReg) << " in " << UseI
- << "\n");
- Score++;
- }
- }
-
- int TotalScore = RetagScore[ST] += Score;
- if (TotalScore > MaxScore ||
- (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
- MaxScore = TotalScore;
- MaxScoreST = ST;
- }
- }
-
- if (MaxScoreST.FI < 0)
- return None;
-
- // If FI's tag is already 0, we are done.
- if (MaxScoreST.Tag == 0)
- return MaxScoreST.FI;
-
- // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
- SlotWithTag SwapST{-1, -1};
- for (auto *I : ReTags) {
- SlotWithTag ST{*I};
- if (ST.Tag == 0) {
- SwapST = ST;
- break;
- }
- }
-
- // Swap tags between the victim and the highest scoring pair.
- // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
- // the highest score slot without changing anything else.
- for (auto *&I : ReTags) {
- SlotWithTag ST{*I};
- MachineOperand &TagOp = I->getOperand(4);
- if (ST == MaxScoreST) {
- TagOp.setImm(0);
- } else if (ST == SwapST) {
- TagOp.setImm(MaxScoreST.Tag);
- }
- }
- return MaxScoreST.FI;
-}
-
+struct SlotWithTag {
+ int FI;
+ int Tag;
+ SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
+ explicit SlotWithTag(const MachineInstr &MI)
+ : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
+ bool operator==(const SlotWithTag &Other) const {
+ return FI == Other.FI && Tag == Other.Tag;
+ }
+};
+
+namespace llvm {
+template <> struct DenseMapInfo<SlotWithTag> {
+ static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
+ static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
+ static unsigned getHashValue(const SlotWithTag &V) {
+ return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
+ DenseMapInfo<int>::getHashValue(V.Tag));
+ }
+ static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
+ return A == B;
+ }
+};
+} // namespace llvm
+
+static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
+ return MFI->getUseLocalStackAllocationBlock() &&
+ MFI->isObjectPreAllocated(FI);
+}
+
+// Pin one of the tagged slots to offset 0 from the tagged base pointer.
+// This would make its address available in a virtual register (IRG's def), as
+// opposed to requiring an ADDG instruction to materialize. This effectively
+// eliminates a vreg (by replacing it with direct uses of IRG, which is usually
+// live almost everywhere anyway), and therefore needs to happen before
+// regalloc.
+Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
+ // Find the best (FI, Tag) pair to pin to offset 0.
+ // Looking at the possible uses of a tagged address, the advantage of pinning
+ // is:
+ // - COPY to physical register.
+ // Does not matter, this would trade a MOV instruction for an ADDG.
+ // - ST*G matter, but those mostly appear near the function prologue where all
+ // the tagged addresses need to be materialized anyway; also, counting ST*G
+ // uses would overweight large allocas that require more than one ST*G
+ // instruction.
+ // - Load/Store instructions in the address operand do not require a tagged
+ // pointer, so they also do not benefit. These operands have already been
+ // eliminated (see uncheckLoadsAndStores) so all remaining load/store
+ // instructions count.
+ // - Any other instruction may benefit from being pinned to offset 0.
+ LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
+ if (!ClFirstSlot)
+ return None;
+
+ DenseMap<SlotWithTag, int> RetagScore;
+ SlotWithTag MaxScoreST{-1, -1};
+ int MaxScore = -1;
+ for (auto *I : ReTags) {
+ SlotWithTag ST{*I};
+ if (isSlotPreAllocated(MFI, ST.FI))
+ continue;
+
+ Register RetagReg = I->getOperand(0).getReg();
+ if (!Register::isVirtualRegister(RetagReg))
+ continue;
+
+ int Score = 0;
+ SmallVector<Register, 8> WorkList;
+ WorkList.push_back(RetagReg);
+
+ while (!WorkList.empty()) {
+ Register UseReg = WorkList.back();
+ WorkList.pop_back();
+ for (auto &UseI : MRI->use_instructions(UseReg)) {
+ unsigned Opcode = UseI.getOpcode();
+ if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
+ Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
+ Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
+ Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
+ Opcode == AArch64::STZGloop_wback)
+ continue;
+ if (UseI.isCopy()) {
+ Register DstReg = UseI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(DstReg))
+ WorkList.push_back(DstReg);
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
+ << Register::virtReg2Index(UseReg) << " in " << UseI
+ << "\n");
+ Score++;
+ }
+ }
+
+ int TotalScore = RetagScore[ST] += Score;
+ if (TotalScore > MaxScore ||
+ (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
+ MaxScore = TotalScore;
+ MaxScoreST = ST;
+ }
+ }
+
+ if (MaxScoreST.FI < 0)
+ return None;
+
+ // If FI's tag is already 0, we are done.
+ if (MaxScoreST.Tag == 0)
+ return MaxScoreST.FI;
+
+ // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
+ SlotWithTag SwapST{-1, -1};
+ for (auto *I : ReTags) {
+ SlotWithTag ST{*I};
+ if (ST.Tag == 0) {
+ SwapST = ST;
+ break;
+ }
+ }
+
+ // Swap tags between the victim and the highest scoring pair.
+ // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
+ // the highest score slot without changing anything else.
+ for (auto *&I : ReTags) {
+ SlotWithTag ST{*I};
+ MachineOperand &TagOp = I->getOperand(4);
+ if (ST == MaxScoreST) {
+ TagOp.setImm(0);
+ } else if (ST == SwapST) {
+ TagOp.setImm(MaxScoreST.Tag);
+ }
+ }
+ return MaxScoreST.FI;
+}
+
bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
MRI = &MF->getRegInfo();
@@ -366,35 +366,35 @@ bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
}
}
- // Take over from SSP. It does nothing for tagged slots, and should not really
- // have been enabled in the first place.
- for (int FI : TaggedSlots)
- MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
-
+ // Take over from SSP. It does nothing for tagged slots, and should not really
+ // have been enabled in the first place.
+ for (int FI : TaggedSlots)
+ MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
+
if (ReTags.empty())
return false;
if (mayUseUncheckedLoadStore())
uncheckLoadsAndStores();
- // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
- // If the base tagged pointer is set up to the address of this slot,
- // the ADDG instruction can be eliminated.
- Optional<int> BaseSlot = findFirstSlotCandidate();
- if (BaseSlot)
- AFI->setTaggedBasePointerIndex(*BaseSlot);
-
- for (auto *I : ReTags) {
- int FI = I->getOperand(1).getIndex();
- int Tag = I->getOperand(4).getImm();
- Register Base = I->getOperand(3).getReg();
- if (Tag == 0 && FI == BaseSlot) {
- BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
- I->getOperand(0).getReg())
- .addReg(Base);
- I->eraseFromParent();
- }
- }
-
+ // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
+ // If the base tagged pointer is set up to the address of this slot,
+ // the ADDG instruction can be eliminated.
+ Optional<int> BaseSlot = findFirstSlotCandidate();
+ if (BaseSlot)
+ AFI->setTaggedBasePointerIndex(*BaseSlot);
+
+ for (auto *I : ReTags) {
+ int FI = I->getOperand(1).getIndex();
+ int Tag = I->getOperand(4).getImm();
+ Register Base = I->getOperand(3).getReg();
+ if (Tag == 0 && FI == BaseSlot) {
+ BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
+ I->getOperand(0).getReg())
+ .addReg(Base);
+ I->eraseFromParent();
+ }
+ }
+
return true;
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
index 71b2bb1964..f78643d8e7 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -67,7 +67,7 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
if (CPUString.empty())
CPUString = "generic";
- ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
+ ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
initializeProperties();
return *this;
@@ -103,26 +103,26 @@ void AArch64Subtarget::initializeProperties() {
case CortexA76:
case CortexA77:
case CortexA78:
- case CortexA78C:
- case CortexR82:
+ case CortexA78C:
+ case CortexR82:
case CortexX1:
PrefFunctionLogAlignment = 4;
break;
case A64FX:
CacheLineSize = 256;
- PrefFunctionLogAlignment = 3;
- PrefLoopLogAlignment = 2;
- MaxInterleaveFactor = 4;
- PrefetchDistance = 128;
- MinPrefetchStride = 1024;
- MaxPrefetchIterationsAhead = 4;
+ PrefFunctionLogAlignment = 3;
+ PrefLoopLogAlignment = 2;
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
break;
case AppleA7:
case AppleA10:
case AppleA11:
case AppleA12:
case AppleA13:
- case AppleA14:
+ case AppleA14:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
@@ -157,8 +157,8 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 3;
break;
case NeoverseN1:
- case NeoverseN2:
- case NeoverseV1:
+ case NeoverseN2:
+ case NeoverseV1:
PrefFunctionLogAlignment = 4;
break;
case Saphira:
@@ -209,7 +209,7 @@ void AArch64Subtarget::initializeProperties() {
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
@@ -375,8 +375,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
return (SVEVectorBitsMin / 128) * 128;
return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
}
-
-bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
- // Prefer NEON unless larger SVE registers are available.
- return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
-}
+
+bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
+ // Prefer NEON unless larger SVE registers are available.
+ return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
index 8fe2f12598..ce401f4986 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,7 +45,7 @@ public:
AppleA11,
AppleA12,
AppleA13,
- AppleA14,
+ AppleA14,
Carmel,
CortexA35,
CortexA53,
@@ -58,24 +58,24 @@ public:
CortexA76,
CortexA77,
CortexA78,
- CortexA78C,
- CortexR82,
+ CortexA78C,
+ CortexR82,
CortexX1,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
- NeoverseN2,
- NeoverseV1,
+ NeoverseN2,
+ NeoverseV1,
Saphira,
ThunderX2T99,
ThunderX,
ThunderXT81,
ThunderXT83,
ThunderXT88,
- ThunderX3T110,
- TSV110
+ ThunderX3T110,
+ TSV110
};
protected:
@@ -88,11 +88,11 @@ protected:
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
- bool HasV8_7aOps = false;
-
- bool HasV8_0rOps = false;
- bool HasCONTEXTIDREL2 = false;
+ bool HasV8_7aOps = false;
+ bool HasV8_0rOps = false;
+ bool HasCONTEXTIDREL2 = false;
+
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
@@ -127,7 +127,7 @@ protected:
bool HasAES = false;
// ARMv8.3 extensions
- bool HasPAuth = false;
+ bool HasPAuth = false;
bool HasJS = false;
bool HasCCIDX = false;
bool HasComplxNum = false;
@@ -141,7 +141,7 @@ protected:
bool HasSEL2 = false;
bool HasPMU = false;
bool HasTLB_RMI = false;
- bool HasFlagM = false;
+ bool HasFlagM = false;
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
@@ -170,12 +170,12 @@ protected:
bool HasFineGrainedTraps = false;
bool HasEnhancedCounterVirtualization = false;
- // Armv8.7-A Extensions
- bool HasXS = false;
- bool HasWFxT = false;
- bool HasHCX = false;
- bool HasLS64 = false;
-
+ // Armv8.7-A Extensions
+ bool HasXS = false;
+ bool HasWFxT = false;
+ bool HasHCX = false;
+ bool HasLS64 = false;
+
// Arm SVE2 extensions
bool HasSVE2 = false;
bool HasSVE2AES = false;
@@ -186,9 +186,9 @@ protected:
// Future architecture extensions.
bool HasETE = false;
bool HasTRBE = false;
- bool HasBRBE = false;
- bool HasPAUTH = false;
- bool HasSPE_EEF = false;
+ bool HasBRBE = false;
+ bool HasPAUTH = false;
+ bool HasSPE_EEF = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -208,7 +208,7 @@ protected:
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
- bool OutlineAtomics = false;
+ bool OutlineAtomics = false;
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -221,7 +221,7 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
- bool HasCmpBccFusion = false;
+ bool HasCmpBccFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseArithmeticLogic = false;
@@ -325,7 +325,7 @@ public:
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
- bool hasV8_0rOps() const { return HasV8_0rOps; }
+ bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -363,7 +363,7 @@ public:
bool hasSHA3() const { return HasSHA3; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
- bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
+ bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
@@ -378,7 +378,7 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
- bool hasCmpBccFusion() const { return HasCmpBccFusion; }
+ bool hasCmpBccFusion() const { return HasCmpBccFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
@@ -454,7 +454,7 @@ public:
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
bool hasTME() const { return HasTME; }
- bool hasPAUTH() const { return HasPAUTH; }
+ bool hasPAUTH() const { return HasPAUTH; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
@@ -484,15 +484,15 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
- bool isTargetILP32() const {
- return TargetTriple.isArch32Bit() ||
- TargetTriple.getEnvironment() == Triple::GNUILP32;
- }
+ bool isTargetILP32() const {
+ return TargetTriple.isArch32Bit() ||
+ TargetTriple.getEnvironment() == Triple::GNUILP32;
+ }
bool useAA() const override { return UseAA; }
- bool outlineAtomics() const { return OutlineAtomics; }
-
+ bool outlineAtomics() const { return OutlineAtomics; }
+
bool hasVH() const { return HasVH; }
bool hasPAN() const { return HasPAN; }
bool hasLOR() const { return HasLOR; }
@@ -501,7 +501,7 @@ public:
bool hasPAN_RWV() const { return HasPAN_RWV; }
bool hasCCPP() const { return HasCCPP; }
- bool hasPAuth() const { return HasPAuth; }
+ bool hasPAuth() const { return HasPAuth; }
bool hasJS() const { return HasJS; }
bool hasCCIDX() const { return HasCCIDX; }
bool hasComplxNum() const { return HasComplxNum; }
@@ -512,14 +512,14 @@ public:
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
bool hasAMVS() const { return HasAMVS; }
- bool hasXS() const { return HasXS; }
- bool hasWFxT() const { return HasWFxT; }
- bool hasHCX() const { return HasHCX; }
- bool hasLS64() const { return HasLS64; }
+ bool hasXS() const { return HasXS; }
+ bool hasWFxT() const { return HasWFxT; }
+ bool hasHCX() const { return HasHCX; }
+ bool hasLS64() const { return HasLS64; }
bool hasSEL2() const { return HasSEL2; }
bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
- bool hasFlagM() const { return HasFlagM; }
+ bool hasFlagM() const { return HasFlagM; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
bool addrSinkUsingGEPs() const override {
@@ -542,7 +542,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
@@ -581,7 +581,7 @@ public:
// implied by the architecture.
unsigned getMaxSVEVectorSizeInBits() const;
unsigned getMinSVEVectorSizeInBits() const;
- bool useSVEForFixedLengthVectors() const;
+ bool useSVEForFixedLengthVectors() const;
};
} // End llvm namespace
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
index 01ac52bd87..0b9c53a72f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64SystemOperands.td
@@ -32,11 +32,11 @@ def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
AssemblerPredicate<(all_of FeaturePAN_RWV),
"ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
-def HasCONTEXTIDREL2
- : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
- AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
- "Target contains CONTEXTIDR_EL2 RW operand">;
-
+def HasCONTEXTIDREL2
+ : Predicate<"Subtarget->hasCONTEXTIDREL2()">,
+ AssemblerPredicate<(all_of FeatureCONTEXTIDREL2),
+ "Target contains CONTEXTIDR_EL2 RW operand">;
+
//===----------------------------------------------------------------------===//
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
@@ -98,21 +98,21 @@ def : DB<"ld", 0xd>;
def : DB<"st", 0xe>;
def : DB<"sy", 0xf>;
-class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
- let SearchableFields = ["Name", "Encoding", "ImmValue"];
- let EnumValueField = "Encoding";
-
- string Name = name;
- bits<4> Encoding = encoding;
- bits<5> ImmValue = immValue;
- code Requires = [{ {AArch64::FeatureXS} }];
-}
-
-def : DBnXS<"oshnxs", 0x3, 0x10>;
-def : DBnXS<"nshnxs", 0x7, 0x14>;
-def : DBnXS<"ishnxs", 0xb, 0x18>;
-def : DBnXS<"synxs", 0xf, 0x1c>;
-
+class DBnXS<string name, bits<4> encoding, bits<5> immValue> : SearchableTable {
+ let SearchableFields = ["Name", "Encoding", "ImmValue"];
+ let EnumValueField = "Encoding";
+
+ string Name = name;
+ bits<4> Encoding = encoding;
+ bits<5> ImmValue = immValue;
+ code Requires = [{ {AArch64::FeatureXS} }];
+}
+
+def : DBnXS<"oshnxs", 0x3, 0x10>;
+def : DBnXS<"nshnxs", 0x7, 0x14>;
+def : DBnXS<"ishnxs", 0xb, 0x18>;
+def : DBnXS<"synxs", 0xf, 0x1c>;
+
//===----------------------------------------------------------------------===//
// DC (data cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
@@ -404,8 +404,8 @@ def : BTI<"jc", 0b11>;
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
-class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg> {
+class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2, bit needsreg> {
string Name = name;
bits<14> Encoding;
let Encoding{13-11} = op1;
@@ -413,122 +413,122 @@ class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm,
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
bit NeedsReg = needsreg;
- list<string> Requires = [];
- list<string> ExtraRequires = [];
- code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
-}
-
-def TLBITable : GenericTable {
- let FilterClass = "TLBIEntry";
- let CppTypeName = "TLBI";
- let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
-}
-
-def lookupTLBIByName : SearchIndex {
- let Table = TLBITable;
- let Key = ["Name"];
+ list<string> Requires = [];
+ list<string> ExtraRequires = [];
+ code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }];
}
-def lookupTLBIByEncoding : SearchIndex {
- let Table = TLBITable;
- let Key = ["Encoding"];
-}
-
-multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
- bits<3> op2, bit needsreg = 1> {
- def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
- def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
- let Encoding{7} = 1;
- let ExtraRequires = ["AArch64::FeatureXS"];
- }
-}
-
-defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
-defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
-defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
-defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
-defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
-defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
-defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
-defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
-defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
-defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
-defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
-defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
-defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
-defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
-defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
-defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
-defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
-defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
-defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
-defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
-
+def TLBITable : GenericTable {
+ let FilterClass = "TLBIEntry";
+ let CppTypeName = "TLBI";
+ let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"];
+}
+
+def lookupTLBIByName : SearchIndex {
+ let Table = TLBITable;
+ let Key = ["Name"];
+}
+
+def lookupTLBIByEncoding : SearchIndex {
+ let Table = TLBITable;
+ let Key = ["Encoding"];
+}
+
+multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
+ bits<3> op2, bit needsreg = 1> {
+ def : TLBIEntry<name, op1, crn, crm, op2, needsreg>;
+ def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> {
+ let Encoding{7} = 1;
+ let ExtraRequires = ["AArch64::FeatureXS"];
+ }
+}
+
+defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
+
// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
-let Requires = ["AArch64::FeatureTLB_RMI"] in {
+let Requires = ["AArch64::FeatureTLB_RMI"] in {
// Armv8.4-A Outer Sharable TLB Maintenance instructions:
// op1 CRn CRm op2
-defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
-defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
-defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
-defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
-defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
-defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
-defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
-defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
-defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
-defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
-defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>;
+defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>;
+defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>;
+defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>;
+defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>;
+defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>;
+defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>;
+defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>;
+defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>;
+defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>;
+defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>;
// Armv8.4-A TLB Range Maintenance instructions:
// op1 CRn CRm op2
-defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
-defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
-defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
-defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
-defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
-defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
-defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
-defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
-defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
-defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
-defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
-defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
-defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
-defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
-defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
-defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
-defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
-defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
-defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
-defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>;
+defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>;
+defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>;
+defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>;
+defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>;
+defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>;
+defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>;
+defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>;
+defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>;
+defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>;
+defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>;
+defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>;
+defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>;
+defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>;
+defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>;
+defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>;
+defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>;
+defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>;
+defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>;
} //FeatureTLB_RMI
// Armv8.5-A Prediction Restriction by Context instruction options:
@@ -643,7 +643,7 @@ def : ROSysReg<"ID_AA64AFR0_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b100>;
def : ROSysReg<"ID_AA64AFR1_EL1", 0b11, 0b000, 0b0000, 0b0101, 0b101>;
def : ROSysReg<"ID_AA64ISAR0_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b000>;
def : ROSysReg<"ID_AA64ISAR1_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b001>;
-def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>;
+def : ROSysReg<"ID_AA64ISAR2_EL1", 0b11, 0b000, 0b0000, 0b0110, 0b010>;
def : ROSysReg<"ID_AA64MMFR0_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b000>;
def : ROSysReg<"ID_AA64MMFR1_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b001>;
def : ROSysReg<"ID_AA64MMFR2_EL1", 0b11, 0b000, 0b0000, 0b0111, 0b010>;
@@ -859,9 +859,9 @@ def : RWSysReg<"ACTLR_EL1", 0b11, 0b000, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"ACTLR_EL2", 0b11, 0b100, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"ACTLR_EL3", 0b11, 0b110, 0b0001, 0b0000, 0b001>;
def : RWSysReg<"HCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b000>;
-def : RWSysReg<"HCRX_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b010> {
- let Requires = [{ {AArch64::FeatureHCX} }];
-}
+def : RWSysReg<"HCRX_EL2", 0b11, 0b100, 0b0001, 0b0010, 0b010> {
+ let Requires = [{ {AArch64::FeatureHCX} }];
+}
def : RWSysReg<"SCR_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b000>;
def : RWSysReg<"MDCR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b001>;
def : RWSysReg<"SDER32_EL3", 0b11, 0b110, 0b0001, 0b0001, 0b001>;
@@ -1293,10 +1293,10 @@ def : RWSysReg<"CNTV_CTL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b001>;
def : RWSysReg<"CNTV_CVAL_EL02", 0b11, 0b101, 0b1110, 0b0011, 0b010>;
def : RWSysReg<"SPSR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b000>;
def : RWSysReg<"ELR_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b001>;
-let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
- def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
-}
+let Requires = [{ {AArch64::FeatureCONTEXTIDREL2} }] in {
+ def : RWSysReg<"CONTEXTIDR_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b001>;
}
+}
// v8.2a registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::FeaturePsUAO} }] in
@@ -1336,7 +1336,7 @@ def : RWSysReg<"VSESR_EL2", 0b11, 0b100, 0b0101, 0b0010, 0b011>;
// v8.3a "Pointer authentication extension" registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeaturePAuth} }] in {
+let Requires = [{ {AArch64::FeaturePAuth} }] in {
def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1570,33 +1570,33 @@ def : RWSysReg<"CNTPCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b101>;
def : RWSysReg<"CNTVCTSS_EL0", 0b11, 0b011, 0b1110, 0b0000, 0b110>;
}
-// v8.7a LD64B/ST64B Accelerator Extension system register
-let Requires = [{ {AArch64::FeatureLS64} }] in
-def : RWSysReg<"ACCDATA_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b101>;
-
-// Branch Record Buffer system registers
-let Requires = [{ {AArch64::FeatureBRBE} }] in {
-def : RWSysReg<"BRBCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBCR_EL12", 0b10, 0b101, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBCR_EL2", 0b10, 0b100, 0b1001, 0b0000, 0b000>;
-def : RWSysReg<"BRBFCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b001>;
-def : ROSysReg<"BRBIDR0_EL1", 0b10, 0b001, 0b1001, 0b0010, 0b000>;
-def : RWSysReg<"BRBINFINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b000>;
-def : RWSysReg<"BRBSRCINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b001>;
-def : RWSysReg<"BRBTGTINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b010>;
-def : RWSysReg<"BRBTS_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b010>;
-foreach n = 0-31 in {
- defvar nb = !cast<bits<5>>(n);
- def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
- def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
- def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
-}
-}
-
-// Statistical Profiling Extension system register
-let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
-def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
-
+// v8.7a LD64B/ST64B Accelerator Extension system register
+let Requires = [{ {AArch64::FeatureLS64} }] in
+def : RWSysReg<"ACCDATA_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b101>;
+
+// Branch Record Buffer system registers
+let Requires = [{ {AArch64::FeatureBRBE} }] in {
+def : RWSysReg<"BRBCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL12", 0b10, 0b101, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBCR_EL2", 0b10, 0b100, 0b1001, 0b0000, 0b000>;
+def : RWSysReg<"BRBFCR_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b001>;
+def : ROSysReg<"BRBIDR0_EL1", 0b10, 0b001, 0b1001, 0b0010, 0b000>;
+def : RWSysReg<"BRBINFINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b000>;
+def : RWSysReg<"BRBSRCINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b001>;
+def : RWSysReg<"BRBTGTINJ_EL1", 0b10, 0b001, 0b1001, 0b0001, 0b010>;
+def : RWSysReg<"BRBTS_EL1", 0b10, 0b001, 0b1001, 0b0000, 0b010>;
+foreach n = 0-31 in {
+ defvar nb = !cast<bits<5>>(n);
+ def : ROSysReg<"BRBINF"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b00}>;
+ def : ROSysReg<"BRBSRC"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b01}>;
+ def : ROSysReg<"BRBTGT"#n#"_EL1", 0b10, 0b001, 0b1000, nb{3-0}, {nb{4},0b10}>;
+}
+}
+
+// Statistical Profiling Extension system register
+let Requires = [{ {AArch64::FeatureSPE_EEF} }] in
+def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
+
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
let Requires = [{ {AArch64::ProcAppleA7} }] in
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
index bec1758a93..5635b07fd6 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -148,10 +148,10 @@ static cl::opt<int> EnableGlobalISelAtO(
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(0));
-static cl::opt<bool>
- EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
- cl::desc("Enable SVE intrinsic opts"),
- cl::init(true));
+static cl::opt<bool>
+ EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
+ cl::desc("Enable SVE intrinsic opts"),
+ cl::init(true));
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
@@ -184,8 +184,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
- initializeAArch64PostLegalizerLoweringPass(*PR);
- initializeAArch64PostSelectOptimizePass(*PR);
+ initializeAArch64PostLegalizerLoweringPass(*PR);
+ initializeAArch64PostSelectOptimizePass(*PR);
initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);
@@ -222,18 +222,18 @@ static std::string computeDataLayout(const Triple &TT,
}
if (TT.isOSBinFormatCOFF())
return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
- std::string Endian = LittleEndian ? "e" : "E";
- std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
- return Endian + "-m:e" + Ptr32 +
- "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
-}
-
-static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
- if (CPU.empty() && TT.isArm64e())
- return "apple-a12";
- return CPU;
+ std::string Endian = LittleEndian ? "e" : "E";
+ std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
+ return Endian + "-m:e" + Ptr32 +
+ "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
}
+static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
+ if (CPU.empty() && TT.isArm64e())
+ return "apple-a12";
+ return CPU;
+}
+
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
// AArch64 Darwin and Windows are always PIC.
@@ -281,8 +281,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
bool LittleEndian)
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
- TT, computeDefaultCPU(TT, CPU), FS, Options,
- getEffectiveRelocModel(TT, RM),
+ TT, computeDefaultCPU(TT, CPU), FS, Options,
+ getEffectiveRelocModel(TT, RM),
getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
initAsmInfo();
@@ -317,7 +317,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// MachO/CodeModel::Large, which GlobalISel does not support.
if (getOptLevel() <= EnableGlobalISelAtO &&
TT.getArch() != Triple::aarch64_32 &&
- TT.getEnvironment() != Triple::GNUILP32 &&
+ TT.getEnvironment() != Triple::GNUILP32 &&
!(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
setGlobalISel(true);
setGlobalISelAbort(GlobalISelAbortMode::Disable);
@@ -340,10 +340,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
- std::string CPU =
- CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
- std::string FS =
- FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
auto &I = SubtargetMap[CPU + FS];
if (!I) {
@@ -460,12 +460,12 @@ void AArch64PassConfig::addIRPasses() {
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
- addPass(createCFGSimplificationPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
+ addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
// Run LoopDataPrefetch
//
@@ -553,13 +553,13 @@ bool AArch64PassConfig::addInstSelector() {
}
bool AArch64PassConfig::addIRTranslator() {
- addPass(new IRTranslator(getOptLevel()));
+ addPass(new IRTranslator(getOptLevel()));
return false;
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
- addPass(createAArch64PreLegalizerCombiner(IsOptNone));
+ addPass(createAArch64PreLegalizerCombiner(IsOptNone));
}
bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -570,8 +570,8 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
void AArch64PassConfig::addPreRegBankSelect() {
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
if (!IsOptNone)
- addPass(createAArch64PostLegalizerCombiner(IsOptNone));
- addPass(createAArch64PostLegalizerLowering());
+ addPass(createAArch64PostLegalizerCombiner(IsOptNone));
+ addPass(createAArch64PostLegalizerLowering());
}
bool AArch64PassConfig::addRegBankSelect() {
@@ -585,8 +585,8 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() {
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createAArch64PostSelectOptimize());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64PostSelectOptimize());
return false;
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
index 25e6261343..2420658743 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetMachine.h
@@ -57,12 +57,12 @@ public:
SMDiagnostic &Error,
SMRange &SourceRange) const override;
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
private:
bool isLittle;
};
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7fda6b8fb6..d9f700a966 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64TargetTransformInfo.h"
+#include "AArch64TargetTransformInfo.h"
#include "AArch64ExpandImm.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -16,11 +16,11 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
using namespace llvm;
-using namespace llvm::PatternMatch;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64tti"
@@ -86,8 +86,8 @@ int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst) {
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -195,10 +195,10 @@ int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TTI::TCC_Free;
break;
- case Intrinsic::experimental_gc_statepoint:
- if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
- return TTI::TCC_Free;
- break;
+ case Intrinsic::experimental_gc_statepoint:
+ if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
}
return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
@@ -212,43 +212,43 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned
-AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) {
- auto *RetTy = ICA.getReturnType();
- switch (ICA.getID()) {
- case Intrinsic::umin:
- case Intrinsic::umax: {
- auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
- // umin(x,y) -> sub(x,usubsat(x,y))
- // umax(x,y) -> add(x,usubsat(y,x))
- if (LT.second == MVT::v2i64)
- return LT.first * 2;
- LLVM_FALLTHROUGH;
- }
- case Intrinsic::smin:
- case Intrinsic::smax: {
- static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
- MVT::v8i16, MVT::v2i32, MVT::v4i32};
- auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
- if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
- return LT.first;
- break;
- }
- default:
- break;
- }
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-}
-
+unsigned
+AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ auto *RetTy = ICA.getReturnType();
+ switch (ICA.getID()) {
+ case Intrinsic::umin:
+ case Intrinsic::umax: {
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::smin:
+ case Intrinsic::smax: {
+ static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
+ MVT::v8i16, MVT::v2i32, MVT::v4i32};
+ auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+ return LT.first;
+ break;
+ }
+ default:
+ break;
+ }
+ return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+}
+
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args) {
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determine the vector width.
auto toVectorTy = [&](Type *ArgTy) {
- return VectorType::get(ArgTy->getScalarType(),
- cast<VectorType>(DstTy)->getElementCount());
+ return VectorType::get(ArgTy->getScalarType(),
+ cast<VectorType>(DstTy)->getElementCount());
};
// Exit early if DstTy is not a vector type whose elements are at least
@@ -297,8 +297,8 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
return false;
// Get the total number of vector elements in the legalized types.
- unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
- unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
+ unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorMinNumElements();
+ unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
// Return true if the legalized types have the same number of vector elements
// and the destination element type size is twice that of the source type.
@@ -306,7 +306,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
}
int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
+ TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -343,8 +343,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
EVT DstTy = TLI->getValueType(DL, Dst);
if (!SrcTy.isSimple() || !DstTy.isSimple())
- return AdjustCost(
- BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
static const TypeConversionCostTblEntry
ConversionTbl[] = {
@@ -448,8 +448,8 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
- return AdjustCost(
- BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
+ return AdjustCost(
+ BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -481,14 +481,14 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
// we may get the extension for free. If not, get the default cost for the
// extend.
if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
- return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
- CostKind);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
// The destination type should be larger than the element type. If not, get
// the default cost for the extend.
- if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
- return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
- CostKind);
+ if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
switch (Opcode) {
default:
@@ -507,8 +507,8 @@ int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
}
// If we are unable to perform the extend for free, get the default cost.
- return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
- CostKind);
+ return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
+ CostKind);
}
unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
@@ -644,19 +644,19 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
return Cost;
- case ISD::MUL:
- if (LT.second != MVT::v2i64)
- return (Cost + 1) * LT.first;
- // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
- // as elements are extracted from the vectors and the muls scalarized.
- // As getScalarizationOverhead is a bit too pessimistic, we estimate the
- // cost for a i64 vector directly here, which is:
- // - four i64 extracts,
- // - two i64 inserts, and
- // - two muls.
- // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
- // LT.first = 2 the cost is 16.
- return LT.first * 8;
+ case ISD::MUL:
+ if (LT.second != MVT::v2i64)
+ return (Cost + 1) * LT.first;
+ // Since we do not have a MUL.2d instruction, a mul <2 x i64> is expensive
+ // as elements are extracted from the vectors and the muls scalarized.
+ // As getScalarizationOverhead is a bit too pessimistic, we estimate the
+ // cost for a i64 vector directly here, which is:
+ // - four i64 extracts,
+ // - two i64 inserts, and
+ // - two muls.
+ // So, for a v2i64 with LT.First = 1 the cost is 8, and for a v4i64 with
+ // LT.first = 2 the cost is 16.
+ return LT.first * 8;
case ISD::ADD:
case ISD::XOR:
case ISD::OR:
@@ -696,40 +696,40 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, CmpInst::Predicate VecPred,
+ Type *CondTy, CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
- I);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
// width.
- if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
+ if (isa<FixedVectorType>(ValTy) && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
const int AmortizationCost = 20;
-
- // If VecPred is not set, check if we can get a predicate from the context
- // instruction, if its type matches the requested ValTy.
- if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
- CmpInst::Predicate CurrentPred;
- if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
- m_Value())))
- VecPred = CurrentPred;
- }
- // Check if we have a compare/select chain that can be lowered using CMxx &
- // BFI pair.
- if (CmpInst::isIntPredicate(VecPred)) {
- static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
- MVT::v8i16, MVT::v2i32, MVT::v4i32,
- MVT::v2i64};
- auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
- if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
- return LT.first;
- }
-
+
+ // If VecPred is not set, check if we can get a predicate from the context
+ // instruction, if its type matches the requested ValTy.
+ if (VecPred == CmpInst::BAD_ICMP_PREDICATE && I && I->getType() == ValTy) {
+ CmpInst::Predicate CurrentPred;
+ if (match(I, m_Select(m_Cmp(CurrentPred, m_Value(), m_Value()), m_Value(),
+ m_Value())))
+ VecPred = CurrentPred;
+ }
+ // Check if we have a compare/select chain that can be lowered using CMxx &
+ // BFI pair.
+ if (CmpInst::isIntPredicate(VecPred)) {
+ static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
+ MVT::v8i16, MVT::v2i32, MVT::v4i32,
+ MVT::v2i64};
+ auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+ return LT.first;
+ }
+
static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
@@ -749,9 +749,9 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- // The base case handles scalable vectors fine for now, since it treats the
- // cost as 1 * legalization cost.
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
+ // The base case handles scalable vectors fine for now, since it treats the
+ // cost as 1 * legalization cost.
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
AArch64TTIImpl::TTI::MemCmpExpansionOptions
@@ -772,30 +772,30 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
-unsigned AArch64TTIImpl::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
-
- if (!isa<ScalableVectorType>(DataTy))
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
- auto *VT = cast<VectorType>(DataTy);
- auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
- ElementCount LegalVF = LT.second.getVectorElementCount();
- Optional<unsigned> MaxNumVScale = getMaxVScale();
- assert(MaxNumVScale && "Expected valid max vscale value");
-
- unsigned MemOpCost =
- getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
- unsigned MaxNumElementsPerGather =
- MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
- return LT.first * MaxNumElementsPerGather * MemOpCost;
-}
-
-bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
- return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
-}
-
+unsigned AArch64TTIImpl::getGatherScatterOpCost(
+ unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
+
+ if (!isa<ScalableVectorType>(DataTy))
+ return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
+ auto *VT = cast<VectorType>(DataTy);
+ auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
+ ElementCount LegalVF = LT.second.getVectorElementCount();
+ Optional<unsigned> MaxNumVScale = getMaxVScale();
+ assert(MaxNumVScale && "Expected valid max vscale value");
+
+ unsigned MemOpCost =
+ getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
+ unsigned MaxNumElementsPerGather =
+ MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
+ return LT.first * MaxNumElementsPerGather * MemOpCost;
+}
+
+bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
+ return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
+}
+
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -823,7 +823,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first * 2 * AmortizationCost;
}
- if (useNeonVector(Ty) &&
+ if (useNeonVector(Ty) &&
cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
unsigned ProfitableNumElements;
if (Opcode == Instruction::Store)
@@ -1098,70 +1098,70 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return false;
}
-int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsPairwise, bool IsUnsigned,
- TTI::TargetCostKind CostKind) {
- if (!isa<ScalableVectorType>(Ty))
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
- CostKind);
- assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
- "Both vector needs to be scalable");
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
- int LegalizationCost = 0;
- if (LT.first > 1) {
- Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
- unsigned CmpOpcode =
- Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
- LegalizationCost =
- getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- LegalizationCost *= LT.first - 1;
- }
-
- return LegalizationCost + /*Cost of horizontal reduction*/ 2;
-}
-
-int AArch64TTIImpl::getArithmeticReductionCostSVE(
- unsigned Opcode, VectorType *ValTy, bool IsPairwise,
- TTI::TargetCostKind CostKind) {
- assert(!IsPairwise && "Cannot be pair wise to continue");
-
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
- int LegalizationCost = 0;
- if (LT.first > 1) {
- Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
- LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
- LegalizationCost *= LT.first - 1;
- }
-
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
- // Add the final reduction cost for the legal horizontal reduction
- switch (ISD) {
- case ISD::ADD:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::FADD:
- return LegalizationCost + 2;
- default:
- // TODO: Replace for invalid when InstructionCost is used
- // cases not supported by SVE
- return 16;
- }
-}
-
+int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<ScalableVectorType>(Ty))
+ return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
+ assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
+ "Both vector needs to be scalable");
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ int LegalizationCost = 0;
+ if (LT.first > 1) {
+ Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
+ unsigned CmpOpcode =
+ Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
+ LegalizationCost =
+ getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind) +
+ getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ LegalizationCost *= LT.first - 1;
+ }
+
+ return LegalizationCost + /*Cost of horizontal reduction*/ 2;
+}
+
+int AArch64TTIImpl::getArithmeticReductionCostSVE(
+ unsigned Opcode, VectorType *ValTy, bool IsPairwise,
+ TTI::TargetCostKind CostKind) {
+ assert(!IsPairwise && "Cannot be pair wise to continue");
+
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ int LegalizationCost = 0;
+ if (LT.first > 1) {
+ Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
+ LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
+ LegalizationCost *= LT.first - 1;
+ }
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+ // Add the final reduction cost for the legal horizontal reduction
+ switch (ISD) {
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FADD:
+ return LegalizationCost + 2;
+ default:
+ // TODO: Replace for invalid when InstructionCost is used
+ // cases not supported by SVE
+ return 16;
+ }
+}
+
int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
VectorType *ValTy,
bool IsPairwiseForm,
TTI::TargetCostKind CostKind) {
- if (isa<ScalableVectorType>(ValTy))
- return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
- CostKind);
+ if (isa<ScalableVectorType>(ValTy))
+ return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
if (IsPairwiseForm)
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
CostKind);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 7c9360ada9..f669e3f595 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -74,8 +74,8 @@ public:
int getIntImmCost(int64_t Val);
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty, TTI::TargetCostKind CostKind,
- Instruction *Inst = nullptr);
+ Type *Ty, TTI::TargetCostKind CostKind,
+ Instruction *Inst = nullptr);
int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty, TTI::TargetCostKind CostKind);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@@ -97,9 +97,9 @@ public:
return 31;
}
- unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind);
-
+ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+
unsigned getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasSVE())
@@ -115,21 +115,21 @@ public:
return ST->getMinVectorRegisterBitWidth();
}
- Optional<unsigned> getMaxVScale() const {
- if (ST->hasSVE())
- return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
- return BaseT::getMaxVScale();
- }
-
+ Optional<unsigned> getMaxVScale() const {
+ if (ST->hasSVE())
+ return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
+ return BaseT::getMaxVScale();
+ }
+
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
-
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
+ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
@@ -139,14 +139,14 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsPairwise, bool IsUnsigned,
- TTI::TargetCostKind CostKind);
-
- int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
- bool IsPairwiseForm,
- TTI::TargetCostKind CostKind);
-
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool IsUnsigned,
+ TTI::TargetCostKind CostKind);
+
+ int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind);
+
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
@@ -160,13 +160,13 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- CmpInst::Predicate VecPred,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
- bool useNeonVector(const Type *Ty) const;
+ bool useNeonVector(const Type *Ty) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
@@ -191,9 +191,9 @@ public:
return false;
Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
- if (Ty->isPointerTy())
- return true;
-
+ if (Ty->isPointerTy())
+ return true;
+
if (Ty->isBFloatTy() || Ty->isHalfTy() ||
Ty->isFloatTy() || Ty->isDoubleTy())
return true;
@@ -241,14 +241,14 @@ public:
shouldConsiderAddressTypePromotion(const Instruction &I,
bool &AllowPromotionWithoutCommonHeader);
- bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
+ bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
unsigned getGISelRematGlobalCost() const {
return 2;
}
- bool supportsScalableVectors() const { return ST->hasSVE(); }
-
+ bool supportsScalableVectors() const { return ST->hasSVE(); }
+
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 96c50ff3f8..d69e2b127c 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
@@ -159,13 +159,13 @@ private:
bool parseSymbolicImmVal(const MCExpr *&ImmVal);
bool parseNeonVectorList(OperandVector &Operands);
bool parseOptionalMulOperand(OperandVector &Operands);
- bool parseKeywordOperand(OperandVector &Operands);
+ bool parseKeywordOperand(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode);
- bool parseImmExpr(int64_t &Out);
- bool parseComma();
- bool parseRegisterInRange(unsigned &Out, unsigned Base, unsigned First,
- unsigned Last);
+ bool parseImmExpr(int64_t &Out);
+ bool parseComma();
+ bool parseRegisterInRange(unsigned &Out, unsigned Base, unsigned First,
+ unsigned Last);
bool showMatchError(SMLoc Loc, unsigned ErrCode, uint64_t ErrorInfo,
OperandVector &Operands);
@@ -187,31 +187,31 @@ private:
bool parseDirectiveVariantPCS(SMLoc L);
- bool parseDirectiveSEHAllocStack(SMLoc L);
- bool parseDirectiveSEHPrologEnd(SMLoc L);
- bool parseDirectiveSEHSaveR19R20X(SMLoc L);
- bool parseDirectiveSEHSaveFPLR(SMLoc L);
- bool parseDirectiveSEHSaveFPLRX(SMLoc L);
- bool parseDirectiveSEHSaveReg(SMLoc L);
- bool parseDirectiveSEHSaveRegX(SMLoc L);
- bool parseDirectiveSEHSaveRegP(SMLoc L);
- bool parseDirectiveSEHSaveRegPX(SMLoc L);
- bool parseDirectiveSEHSaveLRPair(SMLoc L);
- bool parseDirectiveSEHSaveFReg(SMLoc L);
- bool parseDirectiveSEHSaveFRegX(SMLoc L);
- bool parseDirectiveSEHSaveFRegP(SMLoc L);
- bool parseDirectiveSEHSaveFRegPX(SMLoc L);
- bool parseDirectiveSEHSetFP(SMLoc L);
- bool parseDirectiveSEHAddFP(SMLoc L);
- bool parseDirectiveSEHNop(SMLoc L);
- bool parseDirectiveSEHSaveNext(SMLoc L);
- bool parseDirectiveSEHEpilogStart(SMLoc L);
- bool parseDirectiveSEHEpilogEnd(SMLoc L);
- bool parseDirectiveSEHTrapFrame(SMLoc L);
- bool parseDirectiveSEHMachineFrame(SMLoc L);
- bool parseDirectiveSEHContext(SMLoc L);
- bool parseDirectiveSEHClearUnwoundToCall(SMLoc L);
-
+ bool parseDirectiveSEHAllocStack(SMLoc L);
+ bool parseDirectiveSEHPrologEnd(SMLoc L);
+ bool parseDirectiveSEHSaveR19R20X(SMLoc L);
+ bool parseDirectiveSEHSaveFPLR(SMLoc L);
+ bool parseDirectiveSEHSaveFPLRX(SMLoc L);
+ bool parseDirectiveSEHSaveReg(SMLoc L);
+ bool parseDirectiveSEHSaveRegX(SMLoc L);
+ bool parseDirectiveSEHSaveRegP(SMLoc L);
+ bool parseDirectiveSEHSaveRegPX(SMLoc L);
+ bool parseDirectiveSEHSaveLRPair(SMLoc L);
+ bool parseDirectiveSEHSaveFReg(SMLoc L);
+ bool parseDirectiveSEHSaveFRegX(SMLoc L);
+ bool parseDirectiveSEHSaveFRegP(SMLoc L);
+ bool parseDirectiveSEHSaveFRegPX(SMLoc L);
+ bool parseDirectiveSEHSetFP(SMLoc L);
+ bool parseDirectiveSEHAddFP(SMLoc L);
+ bool parseDirectiveSEHNop(SMLoc L);
+ bool parseDirectiveSEHSaveNext(SMLoc L);
+ bool parseDirectiveSEHEpilogStart(SMLoc L);
+ bool parseDirectiveSEHEpilogEnd(SMLoc L);
+ bool parseDirectiveSEHTrapFrame(SMLoc L);
+ bool parseDirectiveSEHMachineFrame(SMLoc L);
+ bool parseDirectiveSEHContext(SMLoc L);
+ bool parseDirectiveSEHClearUnwoundToCall(SMLoc L);
+
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -231,7 +231,7 @@ private:
RegKind MatchKind);
OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
- OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
+ OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
@@ -258,7 +258,7 @@ private:
OperandMatchResultTy tryParseVectorList(OperandVector &Operands,
bool ExpectMatch = false);
OperandMatchResultTy tryParseSVEPattern(OperandVector &Operands);
- OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands);
+ OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands);
public:
enum AArch64MatchResultTy {
@@ -271,7 +271,7 @@ public:
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
- IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+ IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
@@ -404,7 +404,7 @@ private:
const char *Data;
unsigned Length;
unsigned Val; // Not the enum since not all values have names.
- bool HasnXSModifier;
+ bool HasnXSModifier;
};
struct SysRegOp {
@@ -574,11 +574,11 @@ public:
return StringRef(Barrier.Data, Barrier.Length);
}
- bool getBarriernXSModifier() const {
- assert(Kind == k_Barrier && "Invalid access!");
- return Barrier.HasnXSModifier;
- }
-
+ bool getBarriernXSModifier() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return Barrier.HasnXSModifier;
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -750,8 +750,8 @@ public:
ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC ||
ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12 ||
ELFRefKind == AArch64MCExpr::VK_SECREL_LO12 ||
- ELFRefKind == AArch64MCExpr::VK_SECREL_HI12 ||
- ELFRefKind == AArch64MCExpr::VK_GOT_PAGE_LO15) {
+ ELFRefKind == AArch64MCExpr::VK_SECREL_HI12 ||
+ ELFRefKind == AArch64MCExpr::VK_GOT_PAGE_LO15) {
// Note that we don't range-check the addend. It's adjusted modulo page
// size when converted, so there is no "out of range" condition when using
// @pageoff.
@@ -897,8 +897,8 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
- std::is_same<int8_t, T>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVECpyImm<T>(uint64_t(ShiftedImm->first)
@@ -915,8 +915,8 @@ public:
if (!isShiftedImm() && (!isImm() || !isa<MCConstantExpr>(getImm())))
return DiagnosticPredicateTy::NoMatch;
- bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
- std::is_same<int8_t, T>::value;
+ bool IsByte = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
if (auto ShiftedImm = getShiftedVal<8>())
if (!(IsByte && ShiftedImm->second) &&
AArch64_AM::isSVEAddSubImm<T>(ShiftedImm->first
@@ -1041,12 +1041,12 @@ public:
AArch64_AM::getFP64Imm(getFPImm().bitcastToAPInt()) != -1;
}
- bool isBarrier() const {
- return Kind == k_Barrier && !getBarriernXSModifier();
- }
- bool isBarriernXS() const {
- return Kind == k_Barrier && getBarriernXSModifier();
- }
+ bool isBarrier() const {
+ return Kind == k_Barrier && !getBarriernXSModifier();
+ }
+ bool isBarriernXS() const {
+ return Kind == k_Barrier && getBarriernXSModifier();
+ }
bool isSysReg() const { return Kind == k_SysReg; }
bool isMRSSystemRegister() const {
@@ -1173,12 +1173,12 @@ public:
AArch64MCRegisterClasses[AArch64::GPR32RegClassID].contains(Reg.RegNum);
}
- bool isGPR64x8() const {
- return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
- AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].contains(
- Reg.RegNum);
- }
-
+ bool isGPR64x8() const {
+ return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
+ AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].contains(
+ Reg.RegNum);
+ }
+
bool isWSeqPair() const {
return Kind == k_Register && Reg.Kind == RegKind::Scalar &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
@@ -1742,11 +1742,11 @@ public:
Inst.addOperand(MCOperand::createImm(getBarrier()));
}
- void addBarriernXSOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(getBarrier()));
- }
-
+ void addBarriernXSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getBarrier()));
+ }
+
void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
@@ -1982,13 +1982,13 @@ public:
static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
StringRef Str,
SMLoc S,
- MCContext &Ctx,
- bool HasnXSModifier) {
+ MCContext &Ctx,
+ bool HasnXSModifier) {
auto Op = std::make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->Barrier.Data = Str.data();
Op->Barrier.Length = Str.size();
- Op->Barrier.HasnXSModifier = HasnXSModifier;
+ Op->Barrier.HasnXSModifier = HasnXSModifier;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -2133,9 +2133,9 @@ void AArch64Operand::print(raw_ostream &OS) const {
case k_PSBHint:
OS << getPSBHintName();
break;
- case k_BTIHint:
- OS << getBTIHintName();
- break;
+ case k_BTIHint:
+ OS << getBTIHintName();
+ break;
case k_Register:
OS << "<register " << getReg() << ">";
if (!getShiftExtendAmount() && !hasShiftExtendAmount())
@@ -2570,7 +2570,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
ELFRefKind != AArch64MCExpr::VK_ABS_PAGE_NC &&
ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
- ELFRefKind != AArch64MCExpr::VK_GOT_PAGE_LO15 &&
+ ELFRefKind != AArch64MCExpr::VK_GOT_PAGE_LO15 &&
ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
// The operand must be an @page or @gotpage qualified symbolref.
@@ -2904,7 +2904,7 @@ static const struct Extension {
{"predres", {AArch64::FeaturePredRes}},
{"ccdp", {AArch64::FeatureCacheDeepPersist}},
{"mte", {AArch64::FeatureMTE}},
- {"memtag", {AArch64::FeatureMTE}},
+ {"memtag", {AArch64::FeatureMTE}},
{"tlb-rmi", {AArch64::FeatureTLB_RMI}},
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
@@ -2915,10 +2915,10 @@ static const struct Extension {
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
{"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
- {"ls64", {AArch64::FeatureLS64}},
- {"xs", {AArch64::FeatureXS}},
- {"pauth", {AArch64::FeaturePAuth}},
- {"flagm", {AArch64::FeatureFlagM}},
+ {"ls64", {AArch64::FeatureLS64}},
+ {"xs", {AArch64::FeatureXS}},
+ {"pauth", {AArch64::FeaturePAuth}},
+ {"flagm", {AArch64::FeatureFlagM}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},
@@ -2939,16 +2939,16 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.5a";
else if (FBS[AArch64::HasV8_6aOps])
Str += "ARMv8.6a";
- else if (FBS[AArch64::HasV8_7aOps])
- Str += "ARMv8.7a";
+ else if (FBS[AArch64::HasV8_7aOps])
+ Str += "ARMv8.7a";
else {
- SmallVector<std::string, 2> ExtMatches;
- for (const auto& Ext : ExtensionMap) {
+ SmallVector<std::string, 2> ExtMatches;
+ for (const auto& Ext : ExtensionMap) {
// Use & in case multiple features are enabled
- if ((FBS & Ext.Features) != FeatureBitset())
- ExtMatches.push_back(Ext.Name);
- }
- Str += !ExtMatches.empty() ? llvm::join(ExtMatches, ", ") : "(unknown)";
+ if ((FBS & Ext.Features) != FeatureBitset())
+ ExtMatches.push_back(Ext.Name);
+ }
+ Str += !ExtMatches.empty() ? llvm::join(ExtMatches, ", ") : "(unknown)";
}
}
@@ -2993,7 +2993,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!IC)
return TokError("invalid operand for IC instruction");
else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("IC " + std::string(IC->Name) + " requires: ");
+ std::string Str("IC " + std::string(IC->Name) + " requires: ");
setRequiredFeatureString(IC->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3003,7 +3003,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!DC)
return TokError("invalid operand for DC instruction");
else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("DC " + std::string(DC->Name) + " requires: ");
+ std::string Str("DC " + std::string(DC->Name) + " requires: ");
setRequiredFeatureString(DC->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3013,7 +3013,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!AT)
return TokError("invalid operand for AT instruction");
else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("AT " + std::string(AT->Name) + " requires: ");
+ std::string Str("AT " + std::string(AT->Name) + " requires: ");
setRequiredFeatureString(AT->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3023,7 +3023,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (!TLBI)
return TokError("invalid operand for TLBI instruction");
else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
- std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
+ std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3034,7 +3034,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
return TokError("invalid operand for prediction restriction instruction");
else if (!PRCTX->haveFeatures(getSTI().getFeatureBits())) {
std::string Str(
- Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
+ Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
return TokError(Str.c_str());
}
@@ -3082,7 +3082,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
// Immediate operand.
const MCExpr *ImmVal;
SMLoc ExprLoc = getLoc();
- AsmToken IntTok = Tok;
+ AsmToken IntTok = Tok;
if (getParser().parseExpression(ImmVal))
return MatchOperand_ParseFail;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
@@ -3090,22 +3090,22 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "immediate value expected for barrier operand");
return MatchOperand_ParseFail;
}
- int64_t Value = MCE->getValue();
- if (Mnemonic == "dsb" && Value > 15) {
- // This case is a no match here, but it might be matched by the nXS
- // variant. Deliberately not unlex the optional '#' as it is not necessary
- // to characterize an integer immediate.
- Parser.getLexer().UnLex(IntTok);
- return MatchOperand_NoMatch;
- }
- if (Value < 0 || Value > 15) {
+ int64_t Value = MCE->getValue();
+ if (Mnemonic == "dsb" && Value > 15) {
+ // This case is a no match here, but it might be matched by the nXS
+ // variant. Deliberately not unlex the optional '#' as it is not necessary
+ // to characterize an integer immediate.
+ Parser.getLexer().UnLex(IntTok);
+ return MatchOperand_NoMatch;
+ }
+ if (Value < 0 || Value > 15) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- auto DB = AArch64DB::lookupDBByEncoding(Value);
- Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "",
- ExprLoc, getContext(),
- false /*hasnXSModifier*/));
+ auto DB = AArch64DB::lookupDBByEncoding(Value);
+ Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "",
+ ExprLoc, getContext(),
+ false /*hasnXSModifier*/));
return MatchOperand_Success;
}
@@ -3114,9 +3114,9 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- StringRef Operand = Tok.getString();
- auto TSB = AArch64TSB::lookupTSBByName(Operand);
- auto DB = AArch64DB::lookupDBByName(Operand);
+ StringRef Operand = Tok.getString();
+ auto TSB = AArch64TSB::lookupTSBByName(Operand);
+ auto DB = AArch64DB::lookupDBByName(Operand);
// The only valid named option for ISB is 'sy'
if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) {
TokError("'sy' or #imm operand expected");
@@ -3126,79 +3126,79 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
TokError("'csync' operand expected");
return MatchOperand_ParseFail;
} else if (!DB && !TSB) {
- if (Mnemonic == "dsb") {
- // This case is a no match here, but it might be matched by the nXS
- // variant.
- return MatchOperand_NoMatch;
- }
+ if (Mnemonic == "dsb") {
+ // This case is a no match here, but it might be matched by the nXS
+ // variant.
+ return MatchOperand_NoMatch;
+ }
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
}
Operands.push_back(AArch64Operand::CreateBarrier(
- DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
- getContext(), false /*hasnXSModifier*/));
- Parser.Lex(); // Consume the option
-
- return MatchOperand_Success;
-}
-
-OperandMatchResultTy
-AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
-
- assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
- if (Mnemonic != "dsb")
- return MatchOperand_ParseFail;
-
- if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
- // Immediate operand.
- const MCExpr *ImmVal;
- SMLoc ExprLoc = getLoc();
- if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- Error(ExprLoc, "immediate value expected for barrier operand");
- return MatchOperand_ParseFail;
- }
- int64_t Value = MCE->getValue();
- // v8.7-A DSB in the nXS variant accepts only the following immediate
- // values: 16, 20, 24, 28.
- if (Value != 16 && Value != 20 && Value != 24 && Value != 28) {
- Error(ExprLoc, "barrier operand out of range");
- return MatchOperand_ParseFail;
- }
- auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value);
- Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name,
- ExprLoc, getContext(),
- true /*hasnXSModifier*/));
- return MatchOperand_Success;
- }
-
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
-
- StringRef Operand = Tok.getString();
- auto DB = AArch64DBnXS::lookupDBnXSByName(Operand);
-
- if (!DB) {
- TokError("invalid barrier option name");
- return MatchOperand_ParseFail;
- }
-
- Operands.push_back(
- AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
- getContext(), true /*hasnXSModifier*/));
+ DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
+ getContext(), false /*hasnXSModifier*/));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
}
OperandMatchResultTy
+AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+
+ assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
+ if (Mnemonic != "dsb")
+ return MatchOperand_ParseFail;
+
+ if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
+ // Immediate operand.
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = getLoc();
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ Error(ExprLoc, "immediate value expected for barrier operand");
+ return MatchOperand_ParseFail;
+ }
+ int64_t Value = MCE->getValue();
+ // v8.7-A DSB in the nXS variant accepts only the following immediate
+ // values: 16, 20, 24, 28.
+ if (Value != 16 && Value != 20 && Value != 24 && Value != 28) {
+ Error(ExprLoc, "barrier operand out of range");
+ return MatchOperand_ParseFail;
+ }
+ auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value);
+ Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name,
+ ExprLoc, getContext(),
+ true /*hasnXSModifier*/));
+ return MatchOperand_Success;
+ }
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ StringRef Operand = Tok.getString();
+ auto DB = AArch64DBnXS::lookupDBnXSByName(Operand);
+
+ if (!DB) {
+ TokError("invalid barrier option name");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
+ getContext(), true /*hasnXSModifier*/));
+ Parser.Lex(); // Consume the option
+
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy
AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -3438,7 +3438,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
.Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC)
.Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12)
.Case("got", AArch64MCExpr::VK_GOT_PAGE)
- .Case("gotpage_lo15", AArch64MCExpr::VK_GOT_PAGE_LO15)
+ .Case("gotpage_lo15", AArch64MCExpr::VK_GOT_PAGE_LO15)
.Case("got_lo12", AArch64MCExpr::VK_GOT_LO12)
.Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE)
.Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC)
@@ -3707,17 +3707,17 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
return Error(getLoc(), "expected 'vl' or '#<imm>'");
}
-bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- auto Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier))
- return true;
- Operands.push_back(AArch64Operand::CreateToken(Tok.getString(), false,
- Tok.getLoc(), getContext()));
- Parser.Lex();
- return false;
-}
-
+bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ auto Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier))
+ return true;
+ Operands.push_back(AArch64Operand::CreateToken(Tok.getString(), false,
+ Tok.getLoc(), getContext()));
+ Parser.Lex();
+ return false;
+}
+
/// parseOperand - Parse a arm instruction operand. For now this parses the
/// operand regardless of the mnemonic.
bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
@@ -3782,11 +3782,11 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (GotShift != MatchOperand_NoMatch)
return GotShift;
- // If this is a two-word mnemonic, parse its special keyword
- // operand as an identifier.
- if (Mnemonic == "brb")
- return parseKeywordOperand(Operands);
-
+ // If this is a two-word mnemonic, parse its special keyword
+ // operand as an identifier.
+ if (Mnemonic == "brb")
+ return parseKeywordOperand(Operands);
+
// This was not a register so parse other operands that start with an
// identifier (like labels) as expressions and create them as immediates.
const MCExpr *IdVal;
@@ -3895,66 +3895,66 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
}
}
-bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
- const MCExpr *Expr = nullptr;
- SMLoc L = getLoc();
- if (check(getParser().parseExpression(Expr), L, "expected expression"))
- return true;
- const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
- if (check(!Value, L, "expected constant expression"))
- return true;
- Out = Value->getValue();
- return false;
-}
-
-bool AArch64AsmParser::parseComma() {
- if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
- "expected comma"))
- return true;
- // Eat the comma
- getParser().Lex();
- return false;
-}
-
-bool AArch64AsmParser::parseRegisterInRange(unsigned &Out, unsigned Base,
- unsigned First, unsigned Last) {
- unsigned Reg;
- SMLoc Start, End;
- if (check(ParseRegister(Reg, Start, End), getLoc(), "expected register"))
- return true;
-
- // Special handling for FP and LR; they aren't linearly after x28 in
- // the registers enum.
- unsigned RangeEnd = Last;
- if (Base == AArch64::X0) {
- if (Last == AArch64::FP) {
- RangeEnd = AArch64::X28;
- if (Reg == AArch64::FP) {
- Out = 29;
- return false;
- }
- }
- if (Last == AArch64::LR) {
- RangeEnd = AArch64::X28;
- if (Reg == AArch64::FP) {
- Out = 29;
- return false;
- } else if (Reg == AArch64::LR) {
- Out = 30;
- return false;
- }
- }
- }
-
- if (check(Reg < First || Reg > RangeEnd, Start,
- Twine("expected register in range ") +
- AArch64InstPrinter::getRegisterName(First) + " to " +
- AArch64InstPrinter::getRegisterName(Last)))
- return true;
- Out = Reg - Base;
- return false;
-}
-
+bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
+ const MCExpr *Expr = nullptr;
+ SMLoc L = getLoc();
+ if (check(getParser().parseExpression(Expr), L, "expected expression"))
+ return true;
+ const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
+ if (check(!Value, L, "expected constant expression"))
+ return true;
+ Out = Value->getValue();
+ return false;
+}
+
+bool AArch64AsmParser::parseComma() {
+ if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
+ "expected comma"))
+ return true;
+ // Eat the comma
+ getParser().Lex();
+ return false;
+}
+
+bool AArch64AsmParser::parseRegisterInRange(unsigned &Out, unsigned Base,
+ unsigned First, unsigned Last) {
+ unsigned Reg;
+ SMLoc Start, End;
+ if (check(ParseRegister(Reg, Start, End), getLoc(), "expected register"))
+ return true;
+
+ // Special handling for FP and LR; they aren't linearly after x28 in
+ // the registers enum.
+ unsigned RangeEnd = Last;
+ if (Base == AArch64::X0) {
+ if (Last == AArch64::FP) {
+ RangeEnd = AArch64::X28;
+ if (Reg == AArch64::FP) {
+ Out = 29;
+ return false;
+ }
+ }
+ if (Last == AArch64::LR) {
+ RangeEnd = AArch64::X28;
+ if (Reg == AArch64::FP) {
+ Out = 29;
+ return false;
+ } else if (Reg == AArch64::LR) {
+ Out = 30;
+ return false;
+ }
+ }
+ }
+
+ if (check(Reg < First || Reg > RangeEnd, Start,
+ Twine("expected register in range ") +
+ AArch64InstPrinter::getRegisterName(First) + " to " +
+ AArch64InstPrinter::getRegisterName(Last)))
+ return true;
+ Out = Reg - Base;
+ return false;
+}
+
bool AArch64AsmParser::regsEqual(const MCParsedAsmOperand &Op1,
const MCParsedAsmOperand &Op2) const {
auto &AOp1 = static_cast<const AArch64Operand&>(Op1);
@@ -5273,7 +5273,7 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
const MCObjectFileInfo::Environment Format =
getContext().getObjectFileInfo()->getObjectFileType();
bool IsMachO = Format == MCObjectFileInfo::IsMachO;
- bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
+ bool IsCOFF = Format == MCObjectFileInfo::IsCOFF;
auto IDVal = DirectiveID.getIdentifier().lower();
SMLoc Loc = DirectiveID.getLoc();
@@ -5302,57 +5302,57 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveLOH(IDVal, Loc);
else
return true;
- } else if (IsCOFF) {
- if (IDVal == ".seh_stackalloc")
- parseDirectiveSEHAllocStack(Loc);
- else if (IDVal == ".seh_endprologue")
- parseDirectiveSEHPrologEnd(Loc);
- else if (IDVal == ".seh_save_r19r20_x")
- parseDirectiveSEHSaveR19R20X(Loc);
- else if (IDVal == ".seh_save_fplr")
- parseDirectiveSEHSaveFPLR(Loc);
- else if (IDVal == ".seh_save_fplr_x")
- parseDirectiveSEHSaveFPLRX(Loc);
- else if (IDVal == ".seh_save_reg")
- parseDirectiveSEHSaveReg(Loc);
- else if (IDVal == ".seh_save_reg_x")
- parseDirectiveSEHSaveRegX(Loc);
- else if (IDVal == ".seh_save_regp")
- parseDirectiveSEHSaveRegP(Loc);
- else if (IDVal == ".seh_save_regp_x")
- parseDirectiveSEHSaveRegPX(Loc);
- else if (IDVal == ".seh_save_lrpair")
- parseDirectiveSEHSaveLRPair(Loc);
- else if (IDVal == ".seh_save_freg")
- parseDirectiveSEHSaveFReg(Loc);
- else if (IDVal == ".seh_save_freg_x")
- parseDirectiveSEHSaveFRegX(Loc);
- else if (IDVal == ".seh_save_fregp")
- parseDirectiveSEHSaveFRegP(Loc);
- else if (IDVal == ".seh_save_fregp_x")
- parseDirectiveSEHSaveFRegPX(Loc);
- else if (IDVal == ".seh_set_fp")
- parseDirectiveSEHSetFP(Loc);
- else if (IDVal == ".seh_add_fp")
- parseDirectiveSEHAddFP(Loc);
- else if (IDVal == ".seh_nop")
- parseDirectiveSEHNop(Loc);
- else if (IDVal == ".seh_save_next")
- parseDirectiveSEHSaveNext(Loc);
- else if (IDVal == ".seh_startepilogue")
- parseDirectiveSEHEpilogStart(Loc);
- else if (IDVal == ".seh_endepilogue")
- parseDirectiveSEHEpilogEnd(Loc);
- else if (IDVal == ".seh_trap_frame")
- parseDirectiveSEHTrapFrame(Loc);
- else if (IDVal == ".seh_pushframe")
- parseDirectiveSEHMachineFrame(Loc);
- else if (IDVal == ".seh_context")
- parseDirectiveSEHContext(Loc);
- else if (IDVal == ".seh_clear_unwound_to_call")
- parseDirectiveSEHClearUnwoundToCall(Loc);
- else
- return true;
+ } else if (IsCOFF) {
+ if (IDVal == ".seh_stackalloc")
+ parseDirectiveSEHAllocStack(Loc);
+ else if (IDVal == ".seh_endprologue")
+ parseDirectiveSEHPrologEnd(Loc);
+ else if (IDVal == ".seh_save_r19r20_x")
+ parseDirectiveSEHSaveR19R20X(Loc);
+ else if (IDVal == ".seh_save_fplr")
+ parseDirectiveSEHSaveFPLR(Loc);
+ else if (IDVal == ".seh_save_fplr_x")
+ parseDirectiveSEHSaveFPLRX(Loc);
+ else if (IDVal == ".seh_save_reg")
+ parseDirectiveSEHSaveReg(Loc);
+ else if (IDVal == ".seh_save_reg_x")
+ parseDirectiveSEHSaveRegX(Loc);
+ else if (IDVal == ".seh_save_regp")
+ parseDirectiveSEHSaveRegP(Loc);
+ else if (IDVal == ".seh_save_regp_x")
+ parseDirectiveSEHSaveRegPX(Loc);
+ else if (IDVal == ".seh_save_lrpair")
+ parseDirectiveSEHSaveLRPair(Loc);
+ else if (IDVal == ".seh_save_freg")
+ parseDirectiveSEHSaveFReg(Loc);
+ else if (IDVal == ".seh_save_freg_x")
+ parseDirectiveSEHSaveFRegX(Loc);
+ else if (IDVal == ".seh_save_fregp")
+ parseDirectiveSEHSaveFRegP(Loc);
+ else if (IDVal == ".seh_save_fregp_x")
+ parseDirectiveSEHSaveFRegPX(Loc);
+ else if (IDVal == ".seh_set_fp")
+ parseDirectiveSEHSetFP(Loc);
+ else if (IDVal == ".seh_add_fp")
+ parseDirectiveSEHAddFP(Loc);
+ else if (IDVal == ".seh_nop")
+ parseDirectiveSEHNop(Loc);
+ else if (IDVal == ".seh_save_next")
+ parseDirectiveSEHSaveNext(Loc);
+ else if (IDVal == ".seh_startepilogue")
+ parseDirectiveSEHEpilogStart(Loc);
+ else if (IDVal == ".seh_endepilogue")
+ parseDirectiveSEHEpilogEnd(Loc);
+ else if (IDVal == ".seh_trap_frame")
+ parseDirectiveSEHTrapFrame(Loc);
+ else if (IDVal == ".seh_pushframe")
+ parseDirectiveSEHMachineFrame(Loc);
+ else if (IDVal == ".seh_context")
+ parseDirectiveSEHContext(Loc);
+ else if (IDVal == ".seh_clear_unwound_to_call")
+ parseDirectiveSEHClearUnwoundToCall(Loc);
+ else
+ return true;
} else
return true;
return false;
@@ -5360,8 +5360,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
SmallVector<StringRef, 4> &RequestedExtensions) {
- const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto");
- const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto");
+ const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto");
+ const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto");
if (!NoCrypto && Crypto) {
switch (ArchKind) {
@@ -5377,8 +5377,8 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
- case AArch64::ArchKind::ARMV8_7A:
- case AArch64::ArchKind::ARMV8R:
+ case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@@ -5399,7 +5399,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
- case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV8_7A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -5433,8 +5433,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
MCSubtargetInfo &STI = copySTI();
std::vector<std::string> ArchFeatures(AArch64Features.begin(), AArch64Features.end());
- STI.setDefaultFeatures("generic", /*TuneCPU*/ "generic",
- join(ArchFeatures.begin(), ArchFeatures.end(), ","));
+ STI.setDefaultFeatures("generic", /*TuneCPU*/ "generic",
+ join(ArchFeatures.begin(), ArchFeatures.end(), ","));
SmallVector<StringRef, 4> RequestedExtensions;
if (!ExtensionString.empty())
@@ -5536,7 +5536,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
}
MCSubtargetInfo &STI = copySTI();
- STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
CurLoc = incrementLoc(CurLoc, CPU.size());
ExpandCryptoAEK(llvm::AArch64::getCPUArchKind(CPU), RequestedExtensions);
@@ -5804,238 +5804,238 @@ bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
return false;
}
-/// parseDirectiveSEHAllocStack
-/// ::= .seh_stackalloc
-bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) {
- int64_t Size;
- if (parseImmExpr(Size))
- return true;
- getTargetStreamer().EmitARM64WinCFIAllocStack(Size);
- return false;
-}
-
-/// parseDirectiveSEHPrologEnd
-/// ::= .seh_endprologue
-bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIPrologEnd();
- return false;
-}
-
-/// parseDirectiveSEHSaveR19R20X
-/// ::= .seh_save_r19r20_x
-bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) {
- int64_t Offset;
- if (parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFPLR
-/// ::= .seh_save_fplr
-bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) {
- int64_t Offset;
- if (parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFPLR(Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFPLRX
-/// ::= .seh_save_fplr_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFPLRX(SMLoc L) {
- int64_t Offset;
- if (parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFPLRX(Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveReg
-/// ::= .seh_save_reg
-bool AArch64AsmParser::parseDirectiveSEHSaveReg(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveReg(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveRegX
-/// ::= .seh_save_reg_x
-bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveRegX(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveRegP
-/// ::= .seh_save_regp
-bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveRegPX
-/// ::= .seh_save_regp_x
-bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveLRPair
-/// ::= .seh_save_lrpair
-bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- L = getLoc();
- if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- if (check(((Reg - 19) % 2 != 0), L,
- "expected register with even offset from x19"))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFReg
-/// ::= .seh_save_freg
-bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFReg(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFRegX
-/// ::= .seh_save_freg_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFRegX(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFRegP
-/// ::= .seh_save_fregp
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSaveFRegPX
-/// ::= .seh_save_fregp_x
-bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
- unsigned Reg;
- int64_t Offset;
- if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
- parseComma() || parseImmExpr(Offset))
- return true;
- getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);
- return false;
-}
-
-/// parseDirectiveSEHSetFP
-/// ::= .seh_set_fp
-bool AArch64AsmParser::parseDirectiveSEHSetFP(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFISetFP();
- return false;
-}
-
-/// parseDirectiveSEHAddFP
-/// ::= .seh_add_fp
-bool AArch64AsmParser::parseDirectiveSEHAddFP(SMLoc L) {
- int64_t Size;
- if (parseImmExpr(Size))
- return true;
- getTargetStreamer().EmitARM64WinCFIAddFP(Size);
- return false;
-}
-
-/// parseDirectiveSEHNop
-/// ::= .seh_nop
-bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFINop();
- return false;
-}
-
-/// parseDirectiveSEHSaveNext
-/// ::= .seh_save_next
-bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFISaveNext();
- return false;
-}
-
-/// parseDirectiveSEHEpilogStart
-/// ::= .seh_startepilogue
-bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIEpilogStart();
- return false;
-}
-
-/// parseDirectiveSEHEpilogEnd
-/// ::= .seh_endepilogue
-bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIEpilogEnd();
- return false;
-}
-
-/// parseDirectiveSEHTrapFrame
-/// ::= .seh_trap_frame
-bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFITrapFrame();
- return false;
-}
-
-/// parseDirectiveSEHMachineFrame
-/// ::= .seh_pushframe
-bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIMachineFrame();
- return false;
-}
-
-/// parseDirectiveSEHContext
-/// ::= .seh_context
-bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIContext();
- return false;
-}
-
-/// parseDirectiveSEHClearUnwoundToCall
-/// ::= .seh_clear_unwound_to_call
-bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) {
- getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall();
- return false;
-}
-
+/// parseDirectiveSEHAllocStack
+/// ::= .seh_stackalloc
+bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().EmitARM64WinCFIAllocStack(Size);
+ return false;
+}
+
+/// parseDirectiveSEHPrologEnd
+/// ::= .seh_endprologue
+bool AArch64AsmParser::parseDirectiveSEHPrologEnd(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIPrologEnd();
+ return false;
+}
+
+/// parseDirectiveSEHSaveR19R20X
+/// ::= .seh_save_r19r20_x
+bool AArch64AsmParser::parseDirectiveSEHSaveR19R20X(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveR19R20X(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFPLR
+/// ::= .seh_save_fplr
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLR(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFPLR(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFPLRX
+/// ::= .seh_save_fplr_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFPLRX(SMLoc L) {
+ int64_t Offset;
+ if (parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFPLRX(Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveReg
+/// ::= .seh_save_reg
+bool AArch64AsmParser::parseDirectiveSEHSaveReg(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveReg(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegX
+/// ::= .seh_save_reg_x
+bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegP
+/// ::= .seh_save_regp
+bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveRegPX
+/// ::= .seh_save_regp_x
+bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveLRPair
+/// ::= .seh_save_lrpair
+bool AArch64AsmParser::parseDirectiveSEHSaveLRPair(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ L = getLoc();
+ if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ if (check(((Reg - 19) % 2 != 0), L,
+ "expected register with even offset from x19"))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveLRPair(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFReg
+/// ::= .seh_save_freg
+bool AArch64AsmParser::parseDirectiveSEHSaveFReg(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFReg(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegX
+/// ::= .seh_save_freg_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegP
+/// ::= .seh_save_fregp
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSaveFRegPX
+/// ::= .seh_save_fregp_x
+bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) {
+ unsigned Reg;
+ int64_t Offset;
+ if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) ||
+ parseComma() || parseImmExpr(Offset))
+ return true;
+ getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset);
+ return false;
+}
+
+/// parseDirectiveSEHSetFP
+/// ::= .seh_set_fp
+bool AArch64AsmParser::parseDirectiveSEHSetFP(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFISetFP();
+ return false;
+}
+
+/// parseDirectiveSEHAddFP
+/// ::= .seh_add_fp
+bool AArch64AsmParser::parseDirectiveSEHAddFP(SMLoc L) {
+ int64_t Size;
+ if (parseImmExpr(Size))
+ return true;
+ getTargetStreamer().EmitARM64WinCFIAddFP(Size);
+ return false;
+}
+
+/// parseDirectiveSEHNop
+/// ::= .seh_nop
+bool AArch64AsmParser::parseDirectiveSEHNop(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFINop();
+ return false;
+}
+
+/// parseDirectiveSEHSaveNext
+/// ::= .seh_save_next
+bool AArch64AsmParser::parseDirectiveSEHSaveNext(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFISaveNext();
+ return false;
+}
+
+/// parseDirectiveSEHEpilogStart
+/// ::= .seh_startepilogue
+bool AArch64AsmParser::parseDirectiveSEHEpilogStart(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIEpilogStart();
+ return false;
+}
+
+/// parseDirectiveSEHEpilogEnd
+/// ::= .seh_endepilogue
+bool AArch64AsmParser::parseDirectiveSEHEpilogEnd(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIEpilogEnd();
+ return false;
+}
+
+/// parseDirectiveSEHTrapFrame
+/// ::= .seh_trap_frame
+bool AArch64AsmParser::parseDirectiveSEHTrapFrame(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFITrapFrame();
+ return false;
+}
+
+/// parseDirectiveSEHMachineFrame
+/// ::= .seh_pushframe
+bool AArch64AsmParser::parseDirectiveSEHMachineFrame(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIMachineFrame();
+ return false;
+}
+
+/// parseDirectiveSEHContext
+/// ::= .seh_context
+bool AArch64AsmParser::parseDirectiveSEHContext(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIContext();
+ return false;
+}
+
+/// parseDirectiveSEHClearUnwoundToCall
+/// ::= .seh_clear_unwound_to_call
+bool AArch64AsmParser::parseDirectiveSEHClearUnwoundToCall(SMLoc L) {
+ getTargetStreamer().EmitARM64WinCFIClearUnwoundToCall();
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -6323,26 +6323,26 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
return MatchOperand_Success;
}
-
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
- SMLoc SS = getLoc();
-
- unsigned XReg;
- if (tryParseScalarRegister(XReg) != MatchOperand_Success)
- return MatchOperand_NoMatch;
-
- MCContext &ctx = getContext();
- const MCRegisterInfo *RI = ctx.getRegisterInfo();
- int X8Reg = RI->getMatchingSuperReg(
- XReg, AArch64::x8sub_0,
- &AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]);
- if (!X8Reg) {
- Error(SS, "expected an even-numbered x-register in the range [x0,x22]");
- return MatchOperand_ParseFail;
- }
-
- Operands.push_back(
- AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx));
- return MatchOperand_Success;
-}
+
+OperandMatchResultTy
+AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
+ SMLoc SS = getLoc();
+
+ unsigned XReg;
+ if (tryParseScalarRegister(XReg) != MatchOperand_Success)
+ return MatchOperand_NoMatch;
+
+ MCContext &ctx = getContext();
+ const MCRegisterInfo *RI = ctx.getRegisterInfo();
+ int X8Reg = RI->getMatchingSuperReg(
+ XReg, AArch64::x8sub_0,
+ &AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]);
+ if (!X8Reg) {
+ Error(SS, "expected an even-numbered x-register in the range [x0,x22]");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx));
+ return MatchOperand_Success;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
index 512f510d85..c9421c4c06 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/AsmParser/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/MC/MCParser
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
- contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
- contrib/libs/llvm12/lib/Target/AArch64/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/MC/MCParser
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm12/lib/Target/AArch64/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64/AsmParser
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64/AsmParser
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index dca76f8457..72f9968681 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -62,10 +62,10 @@ static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
@@ -271,16 +271,16 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn =
(Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
- const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
-
- for (auto Table : Tables) {
- DecodeStatus Result =
- decodeInstruction(Table, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail)
- return Result;
- }
-
- return MCDisassembler::Fail;
+ const uint8_t *Tables[] = {DecoderTable32, DecoderTableFallback32};
+
+ for (auto Table : Tables) {
+ DecodeStatus Result =
+ decodeInstruction(Table, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail)
+ return Result;
+ }
+
+ return MCDisassembler::Fail;
}
static MCSymbolizer *
@@ -461,35 +461,35 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
-static const unsigned GPR64x8DecoderTable[] = {
- AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
- AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
- AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
- AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
- AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
- AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
- AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
- AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
- AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
- AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
- AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
- AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
-};
-
-static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- if (RegNo > 22)
- return Fail;
- if (RegNo & 1)
- return Fail;
-
- unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
- Inst.addOperand(MCOperand::createReg(Register));
- return Success;
-}
-
+static const unsigned GPR64x8DecoderTable[] = {
+ AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
+ AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
+ AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
+ AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
+ AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
+ AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
+ AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
+ AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
+ AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
+ AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
+ AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
+ AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
+};
+
+static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 22)
+ return Fail;
+ if (RegNo & 1)
+ return Fail;
+
+ unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return Success;
+}
+
static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
index 096b55cd68..e4da353a77 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Disassembler/ya.make
@@ -12,20 +12,20 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/MC/MCDisassembler
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
- contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
- contrib/libs/llvm12/lib/Target/AArch64/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/MC/MCDisassembler
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm12/lib/Target/AArch64/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64/Disassembler
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64/Disassembler
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 0f8b1d6584..7b05f70a73 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -52,10 +52,10 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
: CallLowering(&TLI) {}
namespace {
-struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
+struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
- : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -101,7 +101,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
/// How the physical register gets marked varies between formal
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
- virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
+ virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
uint64_t StackUsed;
};
@@ -111,7 +111,7 @@ struct FormalArgHandler : public IncomingArgHandler {
CCAssignFn *AssignFn)
: IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
- void markPhysRegUsed(MCRegister PhysReg) override {
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
@@ -122,19 +122,19 @@ struct CallReturnHandler : public IncomingArgHandler {
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
- void markPhysRegUsed(MCRegister PhysReg) override {
+ void markPhysRegUsed(MCRegister PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
}
MachineInstrBuilder MIB;
};
-struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
+struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
int FPDiff = 0)
- : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+ : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
StackSize(0), SPReg(0) {}
@@ -187,8 +187,8 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
if (!Arg.IsFixed)
MaxSize = 0;
- assert(Arg.Regs.size() == 1);
-
+ assert(Arg.Regs.size() == 1);
+
Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt
? extendRegister(Arg.Regs[0], VA, MaxSize)
: Arg.Regs[0];
@@ -274,7 +274,7 @@ void AArch64CallLowering::splitToValueTypes(
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs,
- FunctionLoweringInfo &FLI,
+ FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const {
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
@@ -420,7 +420,7 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
// Conservatively forward X8, since it might be used for an aggregate
// return.
if (!CCInfo.isAllocated(AArch64::X8)) {
- Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
@@ -441,7 +441,7 @@ bool AArch64CallLowering::fallBackToDAGISel(const Function &F) const {
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
+ ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -623,25 +623,25 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (Info.IsVarArg) {
- // Be conservative and disallow variadic memory operands to match SDAG's
- // behaviour.
- // FIXME: If the caller's calling convention is C, then we can
- // potentially use its argument area. However, for cases like fastcc,
- // we can't do anything.
- for (unsigned i = 0; i < OutLocs.size(); ++i) {
- auto &ArgLoc = OutLocs[i];
- if (ArgLoc.isRegLoc())
- continue;
+ if (Info.IsVarArg) {
+ // Be conservative and disallow variadic memory operands to match SDAG's
+ // behaviour.
+ // FIXME: If the caller's calling convention is C, then we can
+ // potentially use its argument area. However, for cases like fastcc,
+ // we can't do anything.
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ if (ArgLoc.isRegLoc())
+ continue;
LLVM_DEBUG(
dbgs()
- << "... Cannot tail call vararg function with stack arguments\n");
+ << "... Cannot tail call vararg function with stack arguments\n");
return false;
}
}
- return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
+ return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
}
bool AArch64CallLowering::isEligibleForTailCallOptimization(
@@ -756,7 +756,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
// When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
// x16 or x17.
- if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
+ if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
return AArch64::TCRETURNriBTI;
return AArch64::TCRETURNri;
@@ -776,7 +776,7 @@ bool AArch64CallLowering::lowerTailCall(
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
// register class. Until we can do that, we should fall back here.
- if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
+ if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
LLVM_DEBUG(
dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
return false;
@@ -894,9 +894,9 @@ bool AArch64CallLowering::lowerTailCall(
// If Callee is a reg, since it is used by a target specific instruction,
// it must have a register class matching the constraint of that instruction.
if (Info.Callee.isReg())
- constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB,
- MIB->getDesc(), Info.Callee, 0);
+ constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+ *MF.getSubtarget().getRegBankInfo(), *MIB,
+ MIB->getDesc(), Info.Callee, 0);
MF.getFrameInfo().setHasTailCall();
Info.LoweredTailCall = true;
@@ -978,9 +978,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// instruction, it must have a register class matching the
// constraint of that instruction.
if (Info.Callee.isReg())
- constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
- *MF.getSubtarget().getRegBankInfo(), *MIB,
- MIB->getDesc(), Info.Callee, 0);
+ constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+ *MF.getSubtarget().getRegBankInfo(), *MIB,
+ MIB->getDesc(), Info.Callee, 0);
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
index 1f45c9ebc0..8054cf6b99 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -34,14 +34,14 @@ public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
+ ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI,
Register SwiftErrorVReg) const override;
bool fallBackToDAGISel(const Function &F) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<ArrayRef<Register>> VRegs,
- FunctionLoweringInfo &FLI) const override;
+ ArrayRef<ArrayRef<Register>> VRegs,
+ FunctionLoweringInfo &FLI) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index bed1136c7a..9536f0a596 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -1,29 +1,29 @@
-//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file APIs for AArch64-specific helper functions used in the GlobalISel
-/// pipeline.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
-
-#include <cstdint>
-
-namespace llvm {
-namespace AArch64GISelUtils {
-
-/// \returns true if \p C is a legal immediate operand for an arithmetic
-/// instruction.
-constexpr bool isLegalArithImmed(const uint64_t C) {
- return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
-}
-
-} // namespace AArch64GISelUtils
-} // namespace llvm
-
-#endif
+//===- AArch64GlobalISelUtils.h ----------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file APIs for AArch64-specific helper functions used in the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
+#define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace AArch64GISelUtils {
+
+/// \returns true if \p C is a legal immediate operand for an arithmetic
+/// instruction.
+constexpr bool isLegalArithImmed(const uint64_t C) {
+ return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+}
+
+} // namespace AArch64GISelUtils
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fc5ef02e84..72f92065f3 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,7 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -34,18 +34,18 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/Pass.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "aarch64-isel"
using namespace llvm;
-using namespace MIPatternMatch;
+using namespace MIPatternMatch;
namespace {
@@ -103,23 +103,23 @@ private:
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- ///@{
- /// Helper functions for selectCompareBranch.
- bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
- MachineIRBuilder &MIB) const;
- bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
- MachineIRBuilder &MIB) const;
- bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
+ ///@{
+ /// Helper functions for selectCompareBranch.
+ bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
+ MachineIRBuilder &MIB) const;
+ bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
+ MachineIRBuilder &MIB) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
- ///@}
-
+ ///@}
+
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
// Helper to generate an equivalent of scalar_to_vector into a new register,
@@ -160,7 +160,7 @@ private:
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -173,72 +173,72 @@ private:
MachineIRBuilder &MIRBuilder) const;
// Emit an integer compare between LHS and RHS, which checks for Predicate.
- MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
- MachineOperand &Predicate,
- MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a floating point comparison between \p LHS and \p RHS.
- /// \p Pred if given is the intended predicate to use.
- MachineInstr *emitFPCompare(Register LHS, Register RHS,
- MachineIRBuilder &MIRBuilder,
- Optional<CmpInst::Predicate> = None) const;
-
- MachineInstr *emitInstr(unsigned Opcode,
- std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps,
- MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns = None) const;
- /// Helper function to emit an add or sub instruction.
- ///
- /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
- /// in a specific order.
- ///
- /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
- ///
- /// \code
- /// const std::array<std::array<unsigned, 2>, 4> Table {
- /// {{AArch64::ADDXri, AArch64::ADDWri},
- /// {AArch64::ADDXrs, AArch64::ADDWrs},
- /// {AArch64::ADDXrr, AArch64::ADDWrr},
- /// {AArch64::SUBXri, AArch64::SUBWri},
- /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
- /// \endcode
- ///
- /// Each row in the table corresponds to a different addressing mode. Each
- /// column corresponds to a different register size.
- ///
- /// \attention Rows must be structured as follows:
- /// - Row 0: The ri opcode variants
- /// - Row 1: The rs opcode variants
- /// - Row 2: The rr opcode variants
- /// - Row 3: The ri opcode variants for negative immediates
- /// - Row 4: The rx opcode variants
- ///
- /// \attention Columns must be structured as follows:
- /// - Column 0: The 64-bit opcode variants
- /// - Column 1: The 32-bit opcode variants
- ///
- /// \p Dst is the destination register of the binop to emit.
- /// \p LHS is the left-hand operand of the binop to emit.
- /// \p RHS is the right-hand operand of the binop to emit.
- MachineInstr *emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
- MachineOperand &RHS,
+ MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a floating point comparison between \p LHS and \p RHS.
+ /// \p Pred if given is the intended predicate to use.
+ MachineInstr *emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> = None) const;
+
+ MachineInstr *emitInstr(unsigned Opcode,
+ std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps,
+ MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns = None) const;
+ /// Helper function to emit an add or sub instruction.
+ ///
+ /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
+ /// in a specific order.
+ ///
+ /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
+ ///
+ /// \code
+ /// const std::array<std::array<unsigned, 2>, 4> Table {
+ /// {{AArch64::ADDXri, AArch64::ADDWri},
+ /// {AArch64::ADDXrs, AArch64::ADDWrs},
+ /// {AArch64::ADDXrr, AArch64::ADDWrr},
+ /// {AArch64::SUBXri, AArch64::SUBWri},
+ /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ /// \endcode
+ ///
+ /// Each row in the table corresponds to a different addressing mode. Each
+ /// column corresponds to a different register size.
+ ///
+ /// \attention Rows must be structured as follows:
+ /// - Row 0: The ri opcode variants
+ /// - Row 1: The rs opcode variants
+ /// - Row 2: The rr opcode variants
+ /// - Row 3: The ri opcode variants for negative immediates
+ /// - Row 4: The rx opcode variants
+ ///
+ /// \attention Columns must be structured as follows:
+ /// - Column 0: The 64-bit opcode variants
+ /// - Column 1: The 32-bit opcode variants
+ ///
+ /// \p Dst is the destination register of the binop to emit.
+ /// \p LHS is the left-hand operand of the binop to emit.
+ /// \p RHS is the right-hand operand of the binop to emit.
+ MachineInstr *emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
+ MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
+ MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
- AArch64CC::CondCode CC,
- MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
+ AArch64CC::CondCode CC,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
@@ -250,25 +250,25 @@ private:
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
MachineRegisterInfo &MRI) const;
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg is expected to be a 32-bit scalar register.
+ /// Emit a CSet for an integer compare.
+ ///
+ /// \p DefReg is expected to be a 32-bit scalar register.
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
MachineIRBuilder &MIRBuilder) const;
- /// Emit a CSet for a FP compare.
- ///
- /// \p Dst is expected to be a 32-bit scalar register.
- MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
- MachineIRBuilder &MIRBuilder) const;
-
- /// Emit the overflow op for \p Opcode.
- ///
- /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
- /// G_USUBO, etc.
- std::pair<MachineInstr *, AArch64CC::CondCode>
- emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
- MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
-
+ /// Emit a CSet for a FP compare.
+ ///
+ /// \p Dst is expected to be a 32-bit scalar register.
+ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit the overflow op for \p Opcode.
+ ///
+ /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
+ /// G_USUBO, etc.
+ std::pair<MachineInstr *, AArch64CC::CondCode>
+ emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
+
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
@@ -276,11 +276,11 @@ private:
MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const;
- /// Emit a CB(N)Z instruction which branches to \p DestMBB.
- MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const;
-
+ /// Emit a CB(N)Z instruction which branches to \p DestMBB.
+ MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const;
+
// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
// We use these manually instead of using the importer since it doesn't
// support SDNodeXForm.
@@ -577,7 +577,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
- Immed = ValAndVReg->Value.getSExtValue();
+ Immed = ValAndVReg->Value.getSExtValue();
} else
return None;
return Immed;
@@ -865,7 +865,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
#ifndef NDEBUG
ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
assert(ValidCopy && "Invalid copy.");
- (void)KnownValid;
+ (void)KnownValid;
#endif
return ValidCopy;
};
@@ -1012,173 +1012,173 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
-MachineInstr *
-AArch64InstructionSelector::emitSelect(Register Dst, Register True,
- Register False, AArch64CC::CondCode CC,
- MachineIRBuilder &MIB) const {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
- RBI.getRegBank(True, MRI, TRI)->getID() &&
- "Expected both select operands to have the same regbank?");
- LLT Ty = MRI.getType(True);
- if (Ty.isVector())
- return nullptr;
- const unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) &&
- "Expected 32 bit or 64 bit select only?");
- const bool Is32Bit = Size == 32;
- if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
- unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
- auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
- return &*FCSel;
- }
-
- // By default, we'll try and emit a CSEL.
- unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
- bool Optimized = false;
- auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
- &Optimized](Register &Reg, Register &OtherReg,
- bool Invert) {
- if (Optimized)
- return false;
-
- // Attempt to fold:
- //
- // %sub = G_SUB 0, %x
- // %select = G_SELECT cc, %reg, %sub
- //
- // Into:
- // %select = CSNEG %reg, %x, cc
- Register MatchReg;
- if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
- // Attempt to fold:
- //
- // %xor = G_XOR %x, -1
- // %select = G_SELECT cc, %reg, %xor
- //
- // Into:
- // %select = CSINV %reg, %x, cc
- if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
- // Attempt to fold:
- //
- // %add = G_ADD %x, 1
- // %select = G_SELECT cc, %reg, %add
- //
- // Into:
- // %select = CSINC %reg, %x, cc
- if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- Reg = MatchReg;
- if (Invert) {
- CC = AArch64CC::getInvertedCondCode(CC);
- std::swap(Reg, OtherReg);
- }
- return true;
- }
-
+MachineInstr *
+AArch64InstructionSelector::emitSelect(Register Dst, Register True,
+ Register False, AArch64CC::CondCode CC,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
+ RBI.getRegBank(True, MRI, TRI)->getID() &&
+ "Expected both select operands to have the same regbank?");
+ LLT Ty = MRI.getType(True);
+ if (Ty.isVector())
+ return nullptr;
+ const unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "Expected 32 bit or 64 bit select only?");
+ const bool Is32Bit = Size == 32;
+ if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
+ unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
+ auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
+ return &*FCSel;
+ }
+
+ // By default, we'll try and emit a CSEL.
+ unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
+ bool Optimized = false;
+ auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
+ &Optimized](Register &Reg, Register &OtherReg,
+ bool Invert) {
+ if (Optimized)
+ return false;
+
+ // Attempt to fold:
+ //
+ // %sub = G_SUB 0, %x
+ // %select = G_SELECT cc, %reg, %sub
+ //
+ // Into:
+ // %select = CSNEG %reg, %x, cc
+ Register MatchReg;
+ if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %xor = G_XOR %x, -1
+ // %select = G_SELECT cc, %reg, %xor
+ //
+ // Into:
+ // %select = CSINV %reg, %x, cc
+ if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
+ // Attempt to fold:
+ //
+ // %add = G_ADD %x, 1
+ // %select = G_SELECT cc, %reg, %add
+ //
+ // Into:
+ // %select = CSINC %reg, %x, cc
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ Reg = MatchReg;
+ if (Invert) {
+ CC = AArch64CC::getInvertedCondCode(CC);
+ std::swap(Reg, OtherReg);
+ }
+ return true;
+ }
+
return false;
- };
-
- // Helper lambda which tries to use CSINC/CSINV for the instruction when its
- // true/false values are constants.
- // FIXME: All of these patterns already exist in tablegen. We should be
- // able to import these.
- auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
- &Optimized]() {
- if (Optimized)
- return false;
- auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
- if (!TrueCst && !FalseCst)
- return false;
-
- Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
- if (TrueCst && FalseCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- int64_t F = FalseCst->Value.getSExtValue();
-
- if (T == 0 && F == 1) {
- // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
-
- if (T == 0 && F == -1) {
- // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = ZReg;
- False = ZReg;
- return true;
- }
- }
-
- if (TrueCst) {
- int64_t T = TrueCst->Value.getSExtValue();
- if (T == 1) {
- // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
-
- if (T == -1) {
- // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- True = False;
- False = ZReg;
- CC = AArch64CC::getInvertedCondCode(CC);
- return true;
- }
- }
-
- if (FalseCst) {
- int64_t F = FalseCst->Value.getSExtValue();
- if (F == 1) {
- // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
- False = ZReg;
- return true;
- }
-
- if (F == -1) {
- // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
- Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
- False = ZReg;
- return true;
- }
- }
- return false;
- };
-
- Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
- Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
- Optimized |= TryOptSelectCst();
- auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
- constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
- return &*SelectInst;
+ };
+
+ // Helper lambda which tries to use CSINC/CSINV for the instruction when its
+ // true/false values are constants.
+ // FIXME: All of these patterns already exist in tablegen. We should be
+ // able to import these.
+ auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
+ &Optimized]() {
+ if (Optimized)
+ return false;
+ auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ if (!TrueCst && !FalseCst)
+ return false;
+
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+ if (TrueCst && FalseCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ int64_t F = FalseCst->Value.getSExtValue();
+
+ if (T == 0 && F == 1) {
+ // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+
+ if (T == 0 && F == -1) {
+ // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = ZReg;
+ False = ZReg;
+ return true;
+ }
+ }
+
+ if (TrueCst) {
+ int64_t T = TrueCst->Value.getSExtValue();
+ if (T == 1) {
+ // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+
+ if (T == -1) {
+ // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ True = False;
+ False = ZReg;
+ CC = AArch64CC::getInvertedCondCode(CC);
+ return true;
+ }
+ }
+
+ if (FalseCst) {
+ int64_t F = FalseCst->Value.getSExtValue();
+ if (F == 1) {
+ // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
+ False = ZReg;
+ return true;
+ }
+
+ if (F == -1) {
+ // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
+ Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
+ False = ZReg;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
+ Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
+ Optimized |= TryOptSelectCst();
+ auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
+ constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
+ return &*SelectInst;
}
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
@@ -1308,7 +1308,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
}
if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
+ C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1318,7 +1318,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
auto VRegAndVal =
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
+ C = VRegAndVal->Value.getSExtValue();
break;
}
}
@@ -1420,9 +1420,9 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
}
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
- MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
- MachineIRBuilder &MIB) const {
- assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
+ MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const {
+ assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
// Given something like this:
//
// %x = ...Something...
@@ -1444,92 +1444,92 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getConstantVRegValWithLookThrough(
- AndInst.getOperand(2).getReg(), *MIB.getMRI());
- if (!MaybeBit)
+ auto MaybeBit = getConstantVRegValWithLookThrough(
+ AndInst.getOperand(2).getReg(), *MIB.getMRI());
+ if (!MaybeBit)
return false;
- int32_t Bit = MaybeBit->Value.exactLogBase2();
- if (Bit < 0)
- return false;
-
- Register TestReg = AndInst.getOperand(1).getReg();
+ int32_t Bit = MaybeBit->Value.exactLogBase2();
+ if (Bit < 0)
+ return false;
+ Register TestReg = AndInst.getOperand(1).getReg();
+
// Emit a TB(N)Z.
emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
return true;
}
-MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
- bool IsNegative,
- MachineBasicBlock *DestMBB,
- MachineIRBuilder &MIB) const {
- assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
- AArch64::GPRRegBankID &&
- "Expected GPRs only?");
- auto Ty = MRI.getType(CompareReg);
- unsigned Width = Ty.getSizeInBits();
- assert(!Ty.isVector() && "Expected scalar only?");
- assert(Width <= 64 && "Expected width to be at most 64?");
- static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
- {AArch64::CBNZW, AArch64::CBNZX}};
- unsigned Opc = OpcTable[IsNegative][Width == 64];
- auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
- constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
- return &*BranchMI;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
- MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
- assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
- // totally clean. Some of them require two branches to implement.
- auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
- emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
- Pred);
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
+MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
+ bool IsNegative,
+ MachineBasicBlock *DestMBB,
+ MachineIRBuilder &MIB) const {
+ assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
+ AArch64::GPRRegBankID &&
+ "Expected GPRs only?");
+ auto Ty = MRI.getType(CompareReg);
+ unsigned Width = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected scalar only?");
+ assert(Width <= 64 && "Expected width to be at most 64?");
+ static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
+ {AArch64::CBNZW, AArch64::CBNZX}};
+ unsigned Opc = OpcTable[IsNegative][Width == 64];
+ auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return &*BranchMI;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
+ MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
+ assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+ // totally clean. Some of them require two branches to implement.
+ auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
+ emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
+ Pred);
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
- if (CC2 != AArch64CC::AL)
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
-}
-
-bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
- //
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (!ProduceNonFlagSettingCondBr)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
+ if (CC2 != AArch64CC::AL)
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
+ //
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (!ProduceNonFlagSettingCondBr)
return false;
- MachineRegisterInfo &MRI = *MIB.getMRI();
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto Pred =
- static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
- Register LHS = ICmp.getOperand(2).getReg();
- Register RHS = ICmp.getOperand(3).getReg();
-
- // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto Pred =
+ static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
+ Register LHS = ICmp.getOperand(2).getReg();
+ Register RHS = ICmp.getOperand(3).getReg();
+
+ // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
//
// Handle non-commutative condition codes first.
// Note that we don't want to do this when we have a G_AND because it can
// become a tst. The tst will make the test bit in the TB(N)Z redundant.
- if (VRegAndVal && !AndInst) {
- int64_t C = VRegAndVal->Value.getSExtValue();
+ if (VRegAndVal && !AndInst) {
+ int64_t C = VRegAndVal->Value.getSExtValue();
// When we have a greater-than comparison, we can just test if the msb is
// zero.
@@ -1550,97 +1550,97 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
}
}
- // Attempt to handle commutative condition codes. Right now, that's only
- // eq/ne.
- if (ICmpInst::isEquality(Pred)) {
- if (!VRegAndVal) {
- std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
- }
-
- if (VRegAndVal && VRegAndVal->Value == 0) {
- // If there's a G_AND feeding into this branch, try to fold it away by
- // emitting a TB(N)Z instead.
- //
- // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
- // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
- // would be redundant.
- if (AndInst &&
- tryOptAndIntoCompareBranch(
- *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
- I.eraseFromParent();
- return true;
- }
-
- // Otherwise, try to emit a CB(N)Z instead.
- auto LHSTy = MRI.getType(LHS);
- if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
- emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
- I.eraseFromParent();
- return true;
- }
- }
- }
-
- return false;
-}
-
-bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
- MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
- assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
- assert(I.getOpcode() == TargetOpcode::G_BRCOND);
- if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
+ // Attempt to handle commutative condition codes. Right now, that's only
+ // eq/ne.
+ if (ICmpInst::isEquality(Pred)) {
+ if (!VRegAndVal) {
+ std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
+ }
+
+ if (VRegAndVal && VRegAndVal->Value == 0) {
+ // If there's a G_AND feeding into this branch, try to fold it away by
+ // emitting a TB(N)Z instead.
+ //
+ // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
+ // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
+ // would be redundant.
+ if (AndInst &&
+ tryOptAndIntoCompareBranch(
+ *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, try to emit a CB(N)Z instead.
+ auto LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
+ emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
+ MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
+ assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
+ assert(I.getOpcode() == TargetOpcode::G_BRCOND);
+ if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
return true;
-
- // Couldn't optimize. Emit a compare + a Bcc.
- MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
- auto PredOp = ICmp.getOperand(1);
- emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
- MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
- I.eraseFromParent();
- return true;
-}
-
-bool AArch64InstructionSelector::selectCompareBranch(
- MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- Register CondReg = I.getOperand(0).getReg();
- MachineInstr *CCMI = MRI.getVRegDef(CondReg);
- if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
- CondReg = CCMI->getOperand(1).getReg();
- CCMI = MRI.getVRegDef(CondReg);
- }
-
- // Try to select the G_BRCOND using whatever is feeding the condition if
- // possible.
- MachineIRBuilder MIB(I);
- unsigned CCMIOpc = CCMI->getOpcode();
- if (CCMIOpc == TargetOpcode::G_FCMP)
- return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
- if (CCMIOpc == TargetOpcode::G_ICMP)
- return selectCompareBranchFedByICmp(I, *CCMI, MIB);
-
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
- // instructions will not be produced, as they are conditional branch
- // instructions that do not set flags.
- if (ProduceNonFlagSettingCondBr) {
- emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
- I.getOperand(1).getMBB(), MIB);
+
+ // Couldn't optimize. Emit a compare + a Bcc.
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ auto PredOp = ICmp.getOperand(1);
+ emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+ static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ Register CondReg = I.getOperand(0).getReg();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
+ CondReg = CCMI->getOperand(1).getReg();
+ CCMI = MRI.getVRegDef(CondReg);
+ }
+
+ // Try to select the G_BRCOND using whatever is feeding the condition if
+ // possible.
+ MachineIRBuilder MIB(I);
+ unsigned CCMIOpc = CCMI->getOpcode();
+ if (CCMIOpc == TargetOpcode::G_FCMP)
+ return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
+ if (CCMIOpc == TargetOpcode::G_ICMP)
+ return selectCompareBranchFedByICmp(I, *CCMI, MIB);
+
+ // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
+ // instructions will not be produced, as they are conditional branch
+ // instructions that do not set flags.
+ if (ProduceNonFlagSettingCondBr) {
+ emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
+ I.getOperand(1).getMBB(), MIB);
I.eraseFromParent();
return true;
}
- // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
- auto TstMI =
- MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- auto Bcc = MIB.buildInstr(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
- .addMBB(I.getOperand(1).getMBB());
+ // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
+ auto TstMI =
+ MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ auto Bcc = MIB.buildInstr(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
}
/// Returns the element immediate value of a vector shift operand if found.
@@ -1661,8 +1661,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
return None;
if (Idx == 1)
- ImmVal = VRegAndVal->Value.getSExtValue();
- if (ImmVal != VRegAndVal->Value.getSExtValue())
+ ImmVal = VRegAndVal->Value.getSExtValue();
+ if (ImmVal != VRegAndVal->Value.getSExtValue())
return None;
}
@@ -1725,14 +1725,14 @@ bool AArch64InstructionSelector::selectVectorSHL(
Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
} else if (Ty == LLT::vector(2, 32)) {
Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
- Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
- Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
- Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
- } else if (Ty == LLT::vector(8, 8)) {
- Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
return false;
@@ -1749,10 +1749,10 @@ bool AArch64InstructionSelector::selectVectorSHL(
return true;
}
-bool AArch64InstructionSelector::selectVectorAshrLshr(
+bool AArch64InstructionSelector::selectVectorAshrLshr(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_ASHR ||
- I.getOpcode() == TargetOpcode::G_LSHR);
+ assert(I.getOpcode() == TargetOpcode::G_ASHR ||
+ I.getOpcode() == TargetOpcode::G_LSHR);
Register DstReg = I.getOperand(0).getReg();
const LLT Ty = MRI.getType(DstReg);
Register Src1Reg = I.getOperand(1).getReg();
@@ -1761,40 +1761,40 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
if (!Ty.isVector())
return false;
- bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
-
- // We expect the immediate case to be lowered in the PostLegalCombiner to
- // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
-
+ bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
+
+ // We expect the immediate case to be lowered in the PostLegalCombiner to
+ // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
+
// There is not a shift right register instruction, but the shift left
// register instruction takes a signed value, where negative numbers specify a
// right shift.
unsigned Opc = 0;
unsigned NegOpc = 0;
- const TargetRegisterClass *RC =
- getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
+ const TargetRegisterClass *RC =
+ getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
if (Ty == LLT::vector(2, 64)) {
- Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
+ Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
NegOpc = AArch64::NEGv2i64;
} else if (Ty == LLT::vector(4, 32)) {
- Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
+ Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
NegOpc = AArch64::NEGv4i32;
} else if (Ty == LLT::vector(2, 32)) {
- Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
+ Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
NegOpc = AArch64::NEGv2i32;
- } else if (Ty == LLT::vector(4, 16)) {
- Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
- NegOpc = AArch64::NEGv4i16;
- } else if (Ty == LLT::vector(8, 16)) {
- Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
- NegOpc = AArch64::NEGv8i16;
- } else if (Ty == LLT::vector(16, 8)) {
- Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv16i8;
- } else if (Ty == LLT::vector(8, 8)) {
- Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
- NegOpc = AArch64::NEGv8i8;
+ } else if (Ty == LLT::vector(4, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
+ NegOpc = AArch64::NEGv4i16;
+ } else if (Ty == LLT::vector(8, 16)) {
+ Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
+ NegOpc = AArch64::NEGv8i16;
+ } else if (Ty == LLT::vector(16, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
+ NegOpc = AArch64::NEGv16i8;
+ } else if (Ty == LLT::vector(8, 8)) {
+ Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
+ NegOpc = AArch64::NEGv8i8;
} else {
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
return false;
@@ -1931,40 +1931,40 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MRI.setType(DstReg, LLT::scalar(64));
return true;
}
- case AArch64::G_DUP: {
- // Convert the type from p0 to s64 to help selection.
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (!DstTy.getElementType().isPointer())
- return false;
- MachineIRBuilder MIB(I);
- auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
- MRI.setType(I.getOperand(0).getReg(),
- DstTy.changeElementType(LLT::scalar(64)));
- MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- I.getOperand(1).setReg(NewSrc.getReg(0));
- return true;
- }
- case TargetOpcode::G_UITOFP:
- case TargetOpcode::G_SITOFP: {
- // If both source and destination regbanks are FPR, then convert the opcode
- // to G_SITOF so that the importer can select it to an fpr variant.
- // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
- // copy.
- Register SrcReg = I.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
- return false;
-
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
- if (I.getOpcode() == TargetOpcode::G_SITOFP)
- I.setDesc(TII.get(AArch64::G_SITOF));
- else
- I.setDesc(TII.get(AArch64::G_UITOF));
- return true;
- }
- return false;
- }
+ case AArch64::G_DUP: {
+ // Convert the type from p0 to s64 to help selection.
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (!DstTy.getElementType().isPointer())
+ return false;
+ MachineIRBuilder MIB(I);
+ auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
+ MRI.setType(I.getOperand(0).getReg(),
+ DstTy.changeElementType(LLT::scalar(64)));
+ MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(1).setReg(NewSrc.getReg(0));
+ return true;
+ }
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_SITOFP: {
+ // If both source and destination regbanks are FPR, then convert the opcode
+ // to G_SITOF so that the importer can select it to an fpr variant.
+ // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
+ // copy.
+ Register SrcReg = I.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ return false;
+
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
+ if (I.getOpcode() == TargetOpcode::G_SITOFP)
+ I.setDesc(TII.get(AArch64::G_SITOF));
+ else
+ I.setDesc(TII.get(AArch64::G_UITOF));
+ return true;
+ }
+ return false;
+ }
default:
return false;
}
@@ -2005,14 +2005,14 @@ bool AArch64InstructionSelector::convertPtrAddToAdd(
LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
return false;
}
-
- // Also take the opportunity here to try to do some optimization.
- // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
- Register NegatedReg;
- if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
- return true;
- I.getOperand(2).setReg(NegatedReg);
- I.setDesc(TII.get(TargetOpcode::G_SUB));
+
+ // Also take the opportunity here to try to do some optimization.
+ // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
+ Register NegatedReg;
+ if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
+ return true;
+ I.getOperand(2).setReg(NegatedReg);
+ I.setDesc(TII.get(TargetOpcode::G_SUB));
return true;
}
@@ -2102,17 +2102,17 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
- case TargetOpcode::G_BR: {
- // If the branch jumps to the fallthrough block, don't bother emitting it.
- // Only do this for -O0 for a good code size improvement, because when
- // optimizations are enabled we want to leave this choice to
- // MachineBlockPlacement.
- bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
- if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
- return false;
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_BR: {
+ // If the branch jumps to the fallthrough block, don't bother emitting it.
+ // Only do this for -O0 for a good code size improvement, because when
+ // optimizations are enabled we want to leave this choice to
+ // MachineBlockPlacement.
+ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
+ if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -2232,8 +2232,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
MachineIRBuilder MIB(I);
switch (Opcode) {
- case TargetOpcode::G_BRCOND:
- return selectCompareBranch(I, MF, MRI);
+ case TargetOpcode::G_BRCOND:
+ return selectCompareBranch(I, MF, MRI);
case TargetOpcode::G_BRINDIRECT: {
I.setDesc(TII.get(AArch64::BR));
@@ -2313,7 +2313,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- const LLT s128 = LLT::scalar(128);
+ const LLT s128 = LLT::scalar(128);
const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
@@ -2323,10 +2323,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64 && Ty != s128) {
+ if (Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
<< " constant, expected: " << s32 << " or " << s64
- << " or " << s128 << '\n');
+ << " or " << s128 << '\n');
return false;
}
@@ -2339,9 +2339,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// The case when we have 0.0 is covered by tablegen. Reject it here so we
// can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
- // scenario in the code here.
- if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // scenario in the code here.
+ if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
return false;
} else {
// s32 and s64 are covered by tablegen.
@@ -2368,17 +2368,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass
- : (DefSize == 64 ? AArch64::FPR64RegClass
- : AArch64::FPR128RegClass);
+ const TargetRegisterClass &FPRRC =
+ DefSize == 32 ? AArch64::FPR32RegClass
+ : (DefSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass);
// Can we use a FMOV instruction to represent the immediate?
if (emitFMovForFConstant(I, MRI))
return true;
// For 64b values, emit a constant pool load instead.
- if (DefSize == 64 || DefSize == 128) {
+ if (DefSize == 64 || DefSize == 128) {
auto *FPImm = I.getOperand(1).getFPImm();
MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@@ -2571,21 +2571,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto &MemOp = **I.memoperands_begin();
- uint64_t MemSizeInBytes = MemOp.getSize();
+ uint64_t MemSizeInBytes = MemOp.getSize();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemSizeInBytes == 1) {
+ MemSizeInBytes == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
- unsigned MemSizeInBits = MemSizeInBytes * 8;
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
-#ifndef NDEBUG
+#ifndef NDEBUG
const Register PtrReg = I.getOperand(1).getReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
@@ -2598,78 +2598,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- // Helper lambda for partially selecting I. Either returns the original
- // instruction with an updated opcode, or a new instruction.
- auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return nullptr;
- // Check if we can fold anything into the addressing mode.
- auto AddrModeFns =
- selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
- if (!AddrModeFns) {
- // Can't fold anything. Use the original instruction.
- I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
- return &I;
+ // Helper lambda for partially selecting I. Either returns the original
+ // instruction with an updated opcode, or a new instruction.
+ auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+ bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ const unsigned NewOpc =
+ selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+ if (NewOpc == I.getOpcode())
+ return nullptr;
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns =
+ selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+ if (!AddrModeFns) {
+ // Can't fold anything. Use the original instruction.
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return &I;
}
- // Folded something. Create a new instruction and return it.
- auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
- NewInst.cloneMemRefs(I);
- for (auto &Fn : *AddrModeFns)
- Fn(NewInst);
- I.eraseFromParent();
- return &*NewInst;
- };
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+ IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(I);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ I.eraseFromParent();
+ return &*NewInst;
+ };
- MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
- if (!LoadStore)
- return false;
+ MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+ if (!LoadStore)
+ return false;
// If we're storing a 0, use WZR/XZR.
- if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
- /*HandleFConstants = */ false);
- if (CVal && CVal->Value == 0) {
- switch (LoadStore->getOpcode()) {
- case AArch64::STRWui:
- case AArch64::STRHHui:
- case AArch64::STRBBui:
- LoadStore->getOperand(0).setReg(AArch64::WZR);
- break;
- case AArch64::STRXui:
- LoadStore->getOperand(0).setReg(AArch64::XZR);
- break;
- }
+ if (Opcode == TargetOpcode::G_STORE) {
+ auto CVal = getConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
+ /*HandleFConstants = */ false);
+ if (CVal && CVal->Value == 0) {
+ switch (LoadStore->getOpcode()) {
+ case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
+ LoadStore->getOperand(0).setReg(AArch64::WZR);
+ break;
+ case AArch64::STRXui:
+ LoadStore->getOperand(0).setReg(AArch64::XZR);
+ break;
+ }
}
}
if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the
- // importer.
- if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
+ // The zextload from a smaller type to i32 should be handled by the
+ // importer.
+ if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
- // and zero_extend with SUBREG_TO_REG.
+ // and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = LoadStore->getOperand(0).getReg();
- LoadStore->getOperand(0).setReg(LdReg);
+ Register DstReg = LoadStore->getOperand(0).getReg();
+ LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
+ MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
- return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
@@ -2700,21 +2700,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
if (MRI.getType(I.getOperand(0).getReg()).isVector())
- return selectVectorAshrLshr(I, MRI);
+ return selectVectorAshrLshr(I, MRI);
LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
LLVM_FALLTHROUGH;
- case TargetOpcode::G_FADD:
- case TargetOpcode::G_FSUB:
- case TargetOpcode::G_FMUL:
- case TargetOpcode::G_FDIV:
- case TargetOpcode::G_OR: {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_OR: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -2743,24 +2743,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_SADDO:
- case TargetOpcode::G_UADDO:
- case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_USUBO: {
- // Emit the operation and get the correct condition code.
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_USUBO: {
+ // Emit the operation and get the correct condition code.
MachineIRBuilder MIRBuilder(I);
- auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIRBuilder);
+ auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
+ I.getOperand(2), I.getOperand(3), MIRBuilder);
// Now, put the overflow result in the register given by the first operand
- // to the overflow op. CSINC increments the result when the predicate is
- // false, so to get the increment when it's true, we need to use the
- // inverse. In this case, we want to increment when carry is set.
- Register ZReg = AArch64::WZR;
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
auto CsetMI = MIRBuilder
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
+ {ZReg, ZReg})
+ .addImm(getInvertedCondCode(OpAndCC.second));
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
I.eraseFromParent();
return true;
@@ -2768,7 +2768,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -3059,15 +3059,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (tryOptSelect(I))
return true;
- // Make sure to use an unused vreg instead of wzr, so that the peephole
- // optimizations will be able to optimize these.
- MachineIRBuilder MIB(I);
- Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
- return false;
+ // Make sure to use an unused vreg instead of wzr, so that the peephole
+ // optimizations will be able to optimize these.
+ MachineIRBuilder MIB(I);
+ Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
+ return false;
I.eraseFromParent();
return true;
}
@@ -3082,21 +3082,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
MachineIRBuilder MIRBuilder(I);
- auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIRBuilder);
+ auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+ MIRBuilder);
emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_FCMP: {
- MachineIRBuilder MIRBuilder(I);
- CmpInst::Predicate Pred =
- static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
- MIRBuilder, Pred) ||
- !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
+ MachineIRBuilder MIRBuilder(I);
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
+ if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
+ MIRBuilder, Pred) ||
+ !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
return false;
I.eraseFromParent();
return true;
@@ -3136,24 +3136,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
- case AArch64::G_DUP: {
- // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
- // imported patterns. Do it manually here. Avoiding generating s16 gpr is
- // difficult because at RBS we may end up pessimizing the fpr case if we
- // decided to add an anyextend to fix this. Manual selection is the most
- // robust solution for now.
- Register SrcReg = I.getOperand(1).getReg();
- if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
- return false; // We expect the fpr regbank case to be imported.
- LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.getSizeInBits() == 16)
- I.setDesc(TII.get(AArch64::DUPv8i16gpr));
- else if (SrcTy.getSizeInBits() == 8)
- I.setDesc(TII.get(AArch64::DUPv16i8gpr));
- else
- return false;
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ case AArch64::G_DUP: {
+ // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
+ // imported patterns. Do it manually here. Avoiding generating s16 gpr is
+ // difficult because at RBS we may end up pessimizing the fpr case if we
+ // decided to add an anyextend to fix this. Manual selection is the most
+ // robust solution for now.
+ Register SrcReg = I.getOperand(1).getReg();
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
+ return false; // We expect the fpr regbank case to be imported.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.getSizeInBits() == 16)
+ I.setDesc(TII.get(AArch64::DUPv8i16gpr));
+ else if (SrcTy.getSizeInBits() == 8)
+ I.setDesc(TII.get(AArch64::DUPv16i8gpr));
+ else
+ return false;
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_INTRINSIC_TRUNC:
return selectIntrinsicTrunc(I, MRI);
case TargetOpcode::G_INTRINSIC_ROUND:
@@ -3174,52 +3174,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
- case TargetOpcode::G_VECREDUCE_FADD:
- case TargetOpcode::G_VECREDUCE_ADD:
- return selectReduction(I, MRI);
- }
-
- return false;
-}
-
-bool AArch64InstructionSelector::selectReduction(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- Register VecReg = I.getOperand(1).getReg();
- LLT VecTy = MRI.getType(VecReg);
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::vector(16, 8))
- Opc = AArch64::ADDVv16i8v;
- else if (VecTy == LLT::vector(8, 16))
- Opc = AArch64::ADDVv8i16v;
- else if (VecTy == LLT::vector(4, 32))
- Opc = AArch64::ADDVv4i32v;
- else if (VecTy == LLT::vector(2, 64))
- Opc = AArch64::ADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ return selectReduction(I, MRI);
}
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::vector(2, 32))
- Opc = AArch64::FADDPv2i32p;
- else if (VecTy == LLT::vector(2, 64))
- Opc = AArch64::FADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
return false;
}
+bool AArch64InstructionSelector::selectReduction(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ Register VecReg = I.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(16, 8))
+ Opc = AArch64::ADDVv16i8v;
+ else if (VecTy == LLT::vector(8, 16))
+ Opc = AArch64::ADDVv8i16v;
+ else if (VecTy == LLT::vector(4, 32))
+ Opc = AArch64::ADDVv4i32v;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::ADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
+ if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
+ unsigned Opc = 0;
+ if (VecTy == LLT::vector(2, 32))
+ Opc = AArch64::FADDPv2i32p;
+ else if (VecTy == LLT::vector(2, 64))
+ Opc = AArch64::FADDPv2i64p;
+ else {
+ LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
+ return false;
+ }
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+ return false;
+}
+
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -3230,8 +3230,8 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
-
- MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
+
+ MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
@@ -3268,20 +3268,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
const GlobalValue &GV = *I.getOperand(1).getGlobal();
MachineIRBuilder MIB(I);
- auto LoadGOT =
- MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
- .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+ auto LoadGOT =
+ MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
+ .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
- {LoadGOT.getReg(0)})
+ {LoadGOT.getReg(0)})
.addImm(0);
- MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
+ MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
- .addUse(AArch64::X0, RegState::Implicit)
+ .addUse(AArch64::X0, RegState::Implicit)
.addDef(AArch64::X0, RegState::Implicit)
.addRegMask(TRI.getTLSCallPreservedMask());
@@ -3767,7 +3767,7 @@ bool AArch64InstructionSelector::selectExtractElt(
(void)WideTy;
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!");
- assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
+ assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
// Need the lane index to determine the correct copy opcode.
MachineOperand &LaneIdxOp = I.getOperand(2);
@@ -3782,7 +3782,7 @@ bool AArch64InstructionSelector::selectExtractElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
MachineIRBuilder MIRBuilder(I);
@@ -4005,10 +4005,10 @@ static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 16) {
- Opc = AArch64::INSvi16gpr;
- SubregIdx = AArch64::ssub;
- } else if (EltSize == 32) {
+ if (EltSize == 16) {
+ Opc = AArch64::INSvi16gpr;
+ SubregIdx = AArch64::ssub;
+ } else if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 64) {
@@ -4037,93 +4037,93 @@ getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
return std::make_pair(Opc, SubregIdx);
}
-MachineInstr *AArch64InstructionSelector::emitInstr(
- unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
- std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
- const ComplexRendererFns &RenderFns) const {
- assert(Opcode && "Expected an opcode?");
- assert(!isPreISelGenericOpcode(Opcode) &&
- "Function should only be used to produce selected instructions!");
- auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
- if (RenderFns)
- for (auto &Fn : *RenderFns)
- Fn(MI);
- constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
- return &*MI;
-}
-
-MachineInstr *AArch64InstructionSelector::emitAddSub(
- const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
- Register Dst, MachineOperand &LHS, MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
- auto Ty = MRI.getType(LHS.getReg());
- assert(!Ty.isVector() && "Expected a scalar or pointer?");
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
- bool Is32Bit = Size == 32;
-
- // INSTRri form with positive arithmetic immediate.
- if (auto Fns = selectArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRri form with negative arithmetic immediate.
- if (auto Fns = selectNegArithImmed(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRrx form.
- if (auto Fns = selectArithExtendedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
-
- // INSTRrs form.
- if (auto Fns = selectShiftedRegister(RHS))
- return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
- MIRBuilder, Fns);
- return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
- MIRBuilder);
-}
-
+MachineInstr *AArch64InstructionSelector::emitInstr(
+ unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
+ std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
+ const ComplexRendererFns &RenderFns) const {
+ assert(Opcode && "Expected an opcode?");
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Function should only be used to produce selected instructions!");
+ auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
+ if (RenderFns)
+ for (auto &Fn : *RenderFns)
+ Fn(MI);
+ constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+ return &*MI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitAddSub(
+ const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
+ Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ auto Ty = MRI.getType(LHS.getReg());
+ assert(!Ty.isVector() && "Expected a scalar or pointer?");
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
+ bool Is32Bit = Size == 32;
+
+ // INSTRri form with positive arithmetic immediate.
+ if (auto Fns = selectArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRri form with negative arithmetic immediate.
+ if (auto Fns = selectNegArithImmed(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrx form.
+ if (auto Fns = selectArithExtendedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+
+ // INSTRrs form.
+ if (auto Fns = selectShiftedRegister(RHS))
+ return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
+ MIRBuilder, Fns);
+ return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
+ MIRBuilder);
+}
+
MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDXri, AArch64::ADDWri},
- {AArch64::ADDXrs, AArch64::ADDWrs},
- {AArch64::ADDXrr, AArch64::ADDWrr},
- {AArch64::SUBXri, AArch64::SUBWri},
- {AArch64::ADDXrx, AArch64::ADDWrx}}};
- return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::ADDSXrs, AArch64::ADDSWrs},
- {AArch64::ADDSXrr, AArch64::ADDSWrr},
- {AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- const std::array<std::array<unsigned, 2>, 5> OpcTable{
- {{AArch64::SUBSXri, AArch64::SUBSWri},
- {AArch64::SUBSXrs, AArch64::SUBSWrs},
- {AArch64::SUBSXrr, AArch64::SUBSWrr},
- {AArch64::ADDSXri, AArch64::ADDSWri},
- {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
- return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDXri, AArch64::ADDWri},
+ {AArch64::ADDXrs, AArch64::ADDWrs},
+ {AArch64::ADDXrr, AArch64::ADDWrr},
+ {AArch64::SUBXri, AArch64::SUBWri},
+ {AArch64::ADDXrx, AArch64::ADDWrx}}};
+ return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::ADDSXrs, AArch64::ADDSWrs},
+ {AArch64::ADDSXrr, AArch64::ADDSWrr},
+ {AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ const std::array<std::array<unsigned, 2>, 5> OpcTable{
+ {{AArch64::SUBSXri, AArch64::SUBSWri},
+ {AArch64::SUBSXrs, AArch64::SUBSWrs},
+ {AArch64::SUBSXrr, AArch64::SUBSWrr},
+ {AArch64::ADDSXri, AArch64::ADDSWri},
+ {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
+ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
MachineInstr *
@@ -4131,129 +4131,129 @@ AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
- auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
- return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
+ auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
+ return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
}
MachineInstr *
-AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
+AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
- assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT Ty = MRI.getType(LHS.getReg());
- unsigned RegSize = Ty.getSizeInBits();
+ LLT Ty = MRI.getType(LHS.getReg());
+ unsigned RegSize = Ty.getSizeInBits();
bool Is32Bit = (RegSize == 32);
- const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
- {AArch64::ANDSXrs, AArch64::ANDSWrs},
- {AArch64::ANDSXrr, AArch64::ANDSWrr}};
- // ANDS needs a logical immediate for its immediate form. Check if we can
- // fold one in.
- if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
- int64_t Imm = ValAndVReg->Value.getSExtValue();
-
- if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
- auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
- TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
- constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
- return &*TstMI;
- }
- }
-
- if (auto Fns = selectLogicalShiftedRegister(RHS))
- return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
- return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
+ const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
+ {AArch64::ANDSXrs, AArch64::ANDSWrs},
+ {AArch64::ANDSXrr, AArch64::ANDSWrr}};
+ // ANDS needs a logical immediate for its immediate form. Check if we can
+ // fold one in.
+ if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ int64_t Imm = ValAndVReg->Value.getSExtValue();
+
+ if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
+ auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
+ TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ return &*TstMI;
+ }
+ }
+
+ if (auto Fns = selectLogicalShiftedRegister(RHS))
+ return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
+ return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
}
-MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const {
assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
assert(Predicate.isPredicate() && "Expected predicate?");
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
- LLT CmpTy = MRI.getType(LHS.getReg());
- assert(!CmpTy.isVector() && "Expected scalar or pointer");
- unsigned Size = CmpTy.getSizeInBits();
- (void)Size;
- assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
- // Fold the compare into a cmn or tst if possible.
- if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
- return FoldCmp;
- auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
- return emitSUBS(Dst, LHS, RHS, MIRBuilder);
-}
-
-MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
- Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-#ifndef NDEBUG
- LLT Ty = MRI.getType(Dst);
- assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
- "Expected a 32-bit scalar register?");
-#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
- AArch64CC::CondCode CC1, CC2;
- changeFCMPPredToAArch64CC(Pred, CC1, CC2);
- if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
- const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
- Register Def1Reg = MRI.createVirtualRegister(RC);
- Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
- auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
- constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
- return &*OrMI;
-}
-
-MachineInstr *
-AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
- MachineIRBuilder &MIRBuilder,
- Optional<CmpInst::Predicate> Pred) const {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- LLT Ty = MRI.getType(LHS);
- if (Ty.isVector())
- return nullptr;
- unsigned OpSize = Ty.getSizeInBits();
- if (OpSize != 32 && OpSize != 64)
- return nullptr;
-
- // If this is a compare against +0.0, then we don't have
- // to explicitly materialize a constant.
- const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
- bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
-
- auto IsEqualityPred = [](CmpInst::Predicate P) {
- return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
- P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
- };
- if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
- // Try commutating the operands.
- const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
- if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
- ShouldUseImm = true;
- std::swap(LHS, RHS);
- }
- }
- unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
- {AArch64::FCMPSri, AArch64::FCMPDri}};
- unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
-
- // Partially build the compare. Decide if we need to add a use for the
- // third operand based off whether or not we're comparing against 0.0.
- auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
- if (!ShouldUseImm)
- CmpMI.addUse(RHS);
+ LLT CmpTy = MRI.getType(LHS.getReg());
+ assert(!CmpTy.isVector() && "Expected scalar or pointer");
+ unsigned Size = CmpTy.getSizeInBits();
+ (void)Size;
+ assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
+ // Fold the compare into a cmn or tst if possible.
+ if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
+ return FoldCmp;
+ auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
+ return emitSUBS(Dst, LHS, RHS, MIRBuilder);
+}
+
+MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
+ Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+#ifndef NDEBUG
+ LLT Ty = MRI.getType(Dst);
+ assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
+ "Expected a 32-bit scalar register?");
+#endif
+ const Register ZeroReg = AArch64::WZR;
+ auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
+ auto CSet =
+ MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
+ .addImm(getInvertedCondCode(CC));
+ constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
+ return &*CSet;
+ };
+
+ AArch64CC::CondCode CC1, CC2;
+ changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ if (CC2 == AArch64CC::AL)
+ return EmitCSet(Dst, CC1);
+
+ const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
+ Register Def1Reg = MRI.createVirtualRegister(RC);
+ Register Def2Reg = MRI.createVirtualRegister(RC);
+ EmitCSet(Def1Reg, CC1);
+ EmitCSet(Def2Reg, CC2);
+ auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
+ constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
+ return &*OrMI;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
+ MachineIRBuilder &MIRBuilder,
+ Optional<CmpInst::Predicate> Pred) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ LLT Ty = MRI.getType(LHS);
+ if (Ty.isVector())
+ return nullptr;
+ unsigned OpSize = Ty.getSizeInBits();
+ if (OpSize != 32 && OpSize != 64)
+ return nullptr;
+
+ // If this is a compare against +0.0, then we don't have
+ // to explicitly materialize a constant.
+ const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
+ bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+
+ auto IsEqualityPred = [](CmpInst::Predicate P) {
+ return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
+ P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
+ };
+ if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
+ // Try commutating the operands.
+ const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
+ if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
+ ShouldUseImm = true;
+ std::swap(LHS, RHS);
+ }
+ }
+ unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+ {AArch64::FCMPSri, AArch64::FCMPDri}};
+ unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
+
+ // Partially build the compare. Decide if we need to add a use for the
+ // third operand based off whether or not we're comparing against 0.0.
+ auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
+ if (!ShouldUseImm)
+ CmpMI.addUse(RHS);
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
- return &*CmpMI;
+ return &*CmpMI;
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
@@ -4363,25 +4363,25 @@ AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
return &*I;
}
-std::pair<MachineInstr *, AArch64CC::CondCode>
-AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
- MachineOperand &LHS,
- MachineOperand &RHS,
- MachineIRBuilder &MIRBuilder) const {
- switch (Opcode) {
- default:
- llvm_unreachable("Unexpected opcode!");
- case TargetOpcode::G_SADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_UADDO:
- return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
- case TargetOpcode::G_SSUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
- case TargetOpcode::G_USUBO:
- return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
- }
-}
-
+std::pair<MachineInstr *, AArch64CC::CondCode>
+AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
+ MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_SADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDO:
+ return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBO:
+ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ }
+}
+
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
MachineIRBuilder MIB(I);
MachineRegisterInfo &MRI = *MIB.getMRI();
@@ -4441,17 +4441,17 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
CondCode = changeICMPPredToAArch64CC(Pred);
- emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
- CondDef->getOperand(1), MIB);
+ emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB);
} else {
// Get the condition code for the select.
- auto Pred =
- static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
+ auto Pred =
+ static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
AArch64CC::CondCode CondCode2;
- changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
+ changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
// instructions to emit the comparison.
@@ -4460,16 +4460,16 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
if (CondCode2 != AArch64CC::AL)
return false;
- if (!emitFPCompare(CondDef->getOperand(2).getReg(),
- CondDef->getOperand(3).getReg(), MIB)) {
- LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+ if (!emitFPCompare(CondDef->getOperand(2).getReg(),
+ CondDef->getOperand(3).getReg(), MIB)) {
+ LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
return false;
- }
+ }
}
// Emit the select.
- emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
- I.getOperand(3).getReg(), CondCode, MIB);
+ emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
+ I.getOperand(3).getReg(), CondCode, MIB);
I.eraseFromParent();
return true;
}
@@ -4552,15 +4552,15 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
// Produce this if the compare is signed:
//
// tst x, y
- if (!CmpInst::isUnsigned(P) && LHSDef &&
+ if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
- return emitTST(LHSDef->getOperand(1),
- LHSDef->getOperand(2), MIRBuilder);
+ return emitTST(LHSDef->getOperand(1),
+ LHSDef->getOperand(2), MIRBuilder);
}
return nullptr;
@@ -4708,7 +4708,7 @@ bool AArch64InstructionSelector::selectInsertElt(
auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
- unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
+ unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
// Perform the lane insert.
Register SrcReg = I.getOperand(1).getReg();
@@ -4765,9 +4765,9 @@ bool AArch64InstructionSelector::selectInsertElt(
bool AArch64InstructionSelector::tryOptConstantBuildVec(
MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- unsigned DstSize = DstTy.getSizeInBits();
- assert(DstSize <= 128 && "Unexpected build_vec type!");
- if (DstSize < 32)
+ unsigned DstSize = DstTy.getSizeInBits();
+ assert(DstSize <= 128 && "Unexpected build_vec type!");
+ if (DstSize < 32)
return false;
// Check if we're building a constant vector, in which case we want to
// generate a constant pool load instead of a vector insert sequence.
@@ -4788,24 +4788,24 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
}
Constant *CV = ConstantVector::get(Csts);
MachineIRBuilder MIB(I);
- if (CV->isNullValue()) {
- // Until the importer can support immAllZerosV in pattern leaf nodes,
- // select a zero move manually here.
- Register DstReg = I.getOperand(0).getReg();
- if (DstSize == 128) {
- auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
- I.eraseFromParent();
- return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- } else if (DstSize == 64) {
- auto Mov =
- MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
- .addReg(Mov.getReg(0), 0, AArch64::dsub);
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
- }
- }
+ if (CV->isNullValue()) {
+ // Until the importer can support immAllZerosV in pattern leaf nodes,
+ // select a zero move manually here.
+ Register DstReg = I.getOperand(0).getReg();
+ if (DstSize == 128) {
+ auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ } else if (DstSize == 64) {
+ auto Mov =
+ MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
+ .addImm(0);
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(Mov.getReg(0), 0, AArch64::dsub);
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
+ }
+ }
auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
if (!CPLoad) {
LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
@@ -4927,10 +4927,10 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
case Intrinsic::debugtrap:
MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
break;
- case Intrinsic::ubsantrap:
- MIRBuilder.buildInstr(AArch64::BRK, {}, {})
- .addImm(I.getOperand(1).getImm() | ('U' << 8));
- break;
+ case Intrinsic::ubsantrap:
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {})
+ .addImm(I.getOperand(1).getImm() | ('U' << 8));
+ break;
}
I.eraseFromParent();
@@ -4996,22 +4996,22 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
- if (!MFReturnAddr) {
- // Insert the copy from LR/X30 into the entry block, before it can be
- // clobbered by anything.
- MFI.setReturnAddressIsTaken(true);
- MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
- AArch64::GPR64RegClass);
- }
-
- if (STI.hasPAuth()) {
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
- } else {
- MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ if (!MFReturnAddr) {
+ // Insert the copy from LR/X30 into the entry block, before it can be
+ // clobbered by anything.
+ MFI.setReturnAddressIsTaken(true);
+ MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
+ AArch64::GPR64RegClass);
}
-
+
+ if (STI.hasPAuth()) {
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
+ } else {
+ MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
+
I.eraseFromParent();
return true;
}
@@ -5031,16 +5031,16 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MIRBuilder.buildCopy({DstReg}, {FrameAddr});
else {
MFI.setReturnAddressIsTaken(true);
-
- if (STI.hasPAuth()) {
- Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
- } else {
- MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
- MIRBuilder.buildInstr(AArch64::XPACLRI);
- MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
- }
+
+ if (STI.hasPAuth()) {
+ Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
+ } else {
+ MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
+ MIRBuilder.buildInstr(AArch64::XPACLRI);
+ MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
+ }
}
I.eraseFromParent();
@@ -5248,7 +5248,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
- int64_t ImmVal = ValAndVReg->Value.getSExtValue();
+ int64_t ImmVal = ValAndVReg->Value.getSExtValue();
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
@@ -5388,60 +5388,60 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
- if (!Root.isReg())
- return None;
- MachineInstr *PtrAdd =
- getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!PtrAdd)
+ if (!Root.isReg())
return None;
-
- // Check for an immediates which cannot be encoded in the [base + imm]
- // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
- // end up with code like:
- //
- // mov x0, wide
- // add x1 base, x0
- // ldr x2, [x1, x0]
- //
- // In this situation, we can use the [base, xreg] addressing mode to save an
- // add/sub:
- //
- // mov x0, wide
- // ldr x2, [base, x0]
- auto ValAndVReg =
- getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
- if (ValAndVReg) {
- unsigned Scale = Log2_32(SizeInBytes);
- int64_t ImmOff = ValAndVReg->Value.getSExtValue();
-
- // Skip immediates that can be selected in the load/store addresing
- // mode.
- if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
- ImmOff < (0x1000 << Scale))
- return None;
-
- // Helper lambda to decide whether or not it is preferable to emit an add.
- auto isPreferredADD = [](int64_t ImmOff) {
- // Constants in [0x0, 0xfff] can be encoded in an add.
- if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
- return true;
-
- // Can it be encoded in an add lsl #12?
- if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
- return false;
-
- // It can be encoded in an add lsl #12, but we may not want to. If it is
- // possible to select this as a single movz, then prefer that. A single
- // movz is faster than an add with a shift.
- return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
- (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
- };
-
- // If the immediate can be encoded in a single add/sub, then bail out.
- if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
- return None;
- }
-
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd)
+ return None;
+
+ // Check for an immediates which cannot be encoded in the [base + imm]
+ // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
+ // end up with code like:
+ //
+ // mov x0, wide
+ // add x1 base, x0
+ // ldr x2, [x1, x0]
+ //
+ // In this situation, we can use the [base, xreg] addressing mode to save an
+ // add/sub:
+ //
+ // mov x0, wide
+ // ldr x2, [base, x0]
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ if (ValAndVReg) {
+ unsigned Scale = Log2_32(SizeInBytes);
+ int64_t ImmOff = ValAndVReg->Value.getSExtValue();
+
+ // Skip immediates that can be selected in the load/store addresing
+ // mode.
+ if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
+ ImmOff < (0x1000 << Scale))
+ return None;
+
+ // Helper lambda to decide whether or not it is preferable to emit an add.
+ auto isPreferredADD = [](int64_t ImmOff) {
+ // Constants in [0x0, 0xfff] can be encoded in an add.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+
+ // Can it be encoded in an add lsl #12?
+ if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
+ return false;
+
+ // It can be encoded in an add lsl #12, but we may not want to. If it is
+ // possible to select this as a single movz, then prefer that. A single
+ // movz is faster than an add with a shift.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ };
+
+ // If the immediate can be encoded in a single add/sub, then bail out.
+ if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return None;
+ }
+
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
@@ -5871,8 +5871,8 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- Optional<int64_t> CstVal =
- getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+ Optional<int64_t> CstVal =
+ getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5a6c904e3f..af24267bf2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -14,7 +14,7 @@
#include "AArch64LegalizerInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -23,8 +23,8 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
-#include <initializer_list>
-#include "llvm/Support/MathExtras.h"
+#include <initializer_list>
+#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "aarch64-legalinfo"
@@ -56,13 +56,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v2s64 = LLT::vector(2, 64);
const LLT v2p0 = LLT::vector(2, p0);
- std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
- v16s8, v8s16, v4s32,
- v2s64, v2p0,
- /* End 128bit types */
- /* Begin 64bit types */
- v8s8, v4s16, v2s32};
-
+ std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
+ v16s8, v8s16, v4s32,
+ v2s64, v2p0,
+ /* End 128bit types */
+ /* Begin 64bit types */
+ v8s8, v4s16, v2s32};
+
const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
// FIXME: support subtargets which have neon/fp-armv8 disabled.
@@ -71,31 +71,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return;
}
- // Some instructions only support s16 if the subtarget has full 16-bit FP
- // support.
- const bool HasFP16 = ST.hasFullFP16();
- const LLT &MinFPScalar = HasFP16 ? s16 : s32;
-
+ // Some instructions only support s16 if the subtarget has full 16-bit FP
+ // support.
+ const bool HasFP16 = ST.hasFullFP16();
+ const LLT &MinFPScalar = HasFP16 ? s16 : s32;
+
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
- .legalFor({p0, s1, s8, s16, s32, s64})
- .legalFor(PackedVectorAllTypeList)
- .clampScalar(0, s1, s64)
- .widenScalarToNextPow2(0, 8)
- .fewerElementsIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[0].isVector() &&
- (Query.Types[0].getElementType() != s64 ||
- Query.Types[0].getNumElements() != 2);
- },
- [=](const LegalityQuery &Query) {
- LLT EltTy = Query.Types[0].getElementType();
- if (EltTy == s64)
- return std::make_pair(0, LLT::vector(2, 64));
- return std::make_pair(0, EltTy);
- });
-
- getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
- .legalFor(PackedVectorAllTypeList)
+ .legalFor({p0, s1, s8, s16, s32, s64})
+ .legalFor(PackedVectorAllTypeList)
+ .clampScalar(0, s1, s64)
+ .widenScalarToNextPow2(0, 8)
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() &&
+ (Query.Types[0].getElementType() != s64 ||
+ Query.Types[0].getNumElements() != 2);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT EltTy = Query.Types[0].getElementType();
+ if (EltTy == s64)
+ return std::make_pair(0, LLT::vector(2, 64));
+ return std::make_pair(0, EltTy);
+ });
+
+ getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
+ .legalFor(PackedVectorAllTypeList)
.clampScalar(0, s16, s64)
.widenScalarToNextPow2(0);
@@ -105,38 +105,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
- .scalarizeIf(
- [=](const LegalityQuery &Query) {
- return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
- },
- 0)
- .legalFor({v2s64})
+ .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
+ .scalarizeIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
+ },
+ 0)
+ .legalFor({v2s64})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
- getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
+ getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
.customIf([=](const LegalityQuery &Query) {
const auto &SrcTy = Query.Types[0];
const auto &AmtTy = Query.Types[1];
return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
AmtTy.getSizeInBits() == 32;
})
- .legalFor({
- {s32, s32},
- {s32, s64},
- {s64, s64},
- {v8s8, v8s8},
- {v16s8, v16s8},
- {v4s16, v4s16},
- {v8s16, v8s16},
- {v2s32, v2s32},
- {v4s32, v4s32},
- {v2s64, v2s64},
- })
+ .legalFor({
+ {s32, s32},
+ {s32, s64},
+ {s64, s64},
+ {v8s8, v8s8},
+ {v16s8, v16s8},
+ {v4s16, v4s16},
+ {v8s16, v8s16},
+ {v2s32, v2s32},
+ {v4s32, v4s32},
+ {v2s64, v2s64},
+ })
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
@@ -161,25 +161,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SREM, G_UREM})
.lowerFor({s1, s8, s16, s32, s64});
- getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
- getActionDefinitionsBuilder(
- {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
+ getActionDefinitionsBuilder(
+ {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
.legalFor({{s32, s1}, {s64, s1}})
.minScalar(0, s32);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
- .legalFor({s32, s64, v2s64, v4s32, v2s32})
- .clampNumElements(0, v2s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
+ .legalFor({s32, s64, v2s64, v4s32, v2s32})
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64);
getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
- G_FNEARBYINT, G_INTRINSIC_LRINT})
+ G_FNEARBYINT, G_INTRINSIC_LRINT})
// If we don't have full FP16 support, then scalarize the elements of
// vectors containing fp16 types.
.fewerElementsIf(
@@ -285,7 +285,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v4s32, p0, 128, 8},
{v2s64, p0, 128, 8}})
// These extends are also legal
- .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
.clampScalar(0, s8, s64)
.lowerIfMemSizeNotPow2()
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
@@ -307,7 +307,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{p0, p0, 64, 8},
{s128, p0, 128, 8},
{v16s8, p0, 128, 8},
- {v8s8, p0, 64, 8},
+ {v8s8, p0, 64, 8},
{v4s16, p0, 64, 8},
{v8s16, p0, 128, 8},
{v2s32, p0, 64, 8},
@@ -325,19 +325,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({p0, s8, s16, s32, s64})
+ .legalFor({p0, s8, s16, s32, s64})
.clampScalar(0, s8, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalIf([=](const LegalityQuery &Query) {
- const auto &Ty = Query.Types[0];
- if (HasFP16 && Ty == s16)
- return true;
- return Ty == s32 || Ty == s64 || Ty == s128;
- })
- .clampScalar(0, MinFPScalar, s128);
-
- getActionDefinitionsBuilder({G_ICMP, G_FCMP})
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ if (HasFP16 && Ty == s16)
+ return true;
+ return Ty == s32 || Ty == s64 || Ty == s128;
+ })
+ .clampScalar(0, MinFPScalar, s128);
+
+ getActionDefinitionsBuilder({G_ICMP, G_FCMP})
.legalFor({{s32, s32},
{s32, s64},
{s32, p0},
@@ -365,8 +365,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
s64)
- .widenScalarOrEltToNextPow2(1)
- .clampNumElements(0, v2s32, v4s32);
+ .widenScalarOrEltToNextPow2(1)
+ .clampNumElements(0, v2s32, v4s32);
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
@@ -374,7 +374,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
if (DstSize == 128 && !Query.Types[0].isVector())
return false; // Extending to a scalar s128 needs narrowing.
-
+
// Make sure that we have something that will fit in a register, and
// make sure it's a power of 2.
if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
@@ -399,28 +399,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalIf(ExtLegalFunc)
.clampScalar(0, s64, s64); // Just for s128, others are handled above.
- getActionDefinitionsBuilder(G_TRUNC)
- .minScalarOrEltIf(
- [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
- 0, s8)
- .customIf([=](const LegalityQuery &Query) {
- LLT DstTy = Query.Types[0];
- LLT SrcTy = Query.Types[1];
- return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
- })
- .alwaysLegal();
+ getActionDefinitionsBuilder(G_TRUNC)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ 0, s8)
+ .customIf([=](const LegalityQuery &Query) {
+ LLT DstTy = Query.Types[0];
+ LLT SrcTy = Query.Types[1];
+ return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+ })
+ .alwaysLegal();
- getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
+ getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
// FP conversions
- getActionDefinitionsBuilder(G_FPTRUNC)
- .legalFor(
- {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
- .clampMaxNumElements(0, s32, 2);
- getActionDefinitionsBuilder(G_FPEXT)
- .legalFor(
- {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
- .clampMaxNumElements(0, s64, 2);
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor(
+ {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
+ .clampMaxNumElements(0, s32, 2);
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor(
+ {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
+ .clampMaxNumElements(0, s64, 2);
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
@@ -433,7 +433,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
.clampScalar(1, s32, s64)
- .minScalarSameAs(1, 0)
+ .minScalarSameAs(1, 0)
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0);
@@ -445,8 +445,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
- .lowerIf(isVector(0));
+ .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
+ .lowerIf(isVector(0));
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -576,8 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
})
// Any vectors left are the wrong size. Scalarize them.
- .scalarize(0)
- .scalarize(1);
+ .scalarize(0)
+ .scalarize(1);
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -589,40 +589,40 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalIf([=](const LegalityQuery &Query) {
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
- VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
- VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
- })
- .minScalarOrEltIf(
- [=](const LegalityQuery &Query) {
- // We want to promote to <M x s1> to <M x s64> if that wouldn't
- // cause the total vec size to be > 128b.
- return Query.Types[1].getNumElements() <= 2;
- },
- 0, s64)
- .minScalarOrEltIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 4;
- },
- 0, s32)
- .minScalarOrEltIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 8;
- },
- 0, s16)
- .minScalarOrEltIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[1].getNumElements() <= 16;
- },
- 0, s8)
- .minScalarOrElt(0, s8); // Worst case, we need at least s8.
+ VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
+ VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
+ })
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ // We want to promote to <M x s1> to <M x s64> if that wouldn't
+ // cause the total vec size to be > 128b.
+ return Query.Types[1].getNumElements() <= 2;
+ },
+ 0, s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 4;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 8;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[1].getNumElements() <= 16;
+ },
+ 0, s8)
+ .minScalarOrElt(0, s8); // Worst case, we need at least s8.
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
- .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
+ .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalFor({{v8s8, s8},
- {v16s8, s8},
- {v4s16, s16},
+ .legalFor({{v8s8, s8},
+ {v16s8, s8},
+ {v4s16, s16},
{v8s16, s16},
{v2s32, s32},
{v4s32, s32},
@@ -638,9 +638,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.minScalarSameAs(1, 0);
- getActionDefinitionsBuilder(G_CTLZ)
- .legalForCartesianProduct(
- {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalForCartesianProduct(
+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
.scalarize(1);
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
@@ -651,7 +651,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// to be the same size as the dest.
if (DstTy != SrcTy)
return false;
- for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
+ for (auto &Ty : {v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}) {
if (DstTy == Ty)
return true;
}
@@ -668,7 +668,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
- getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
+ getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
return Query.Types[0] == p0 && Query.Types[1] == s64;
@@ -676,20 +676,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
- getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
-
- getActionDefinitionsBuilder(G_ABS).lowerIf(
- [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
-
- getActionDefinitionsBuilder(G_VECREDUCE_FADD)
- // We only have FADDP to do reduction-like operations. Lower the rest.
- .legalFor({{s32, v2s32}, {s64, v2s64}})
- .lower();
-
- getActionDefinitionsBuilder(G_VECREDUCE_ADD)
- .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}})
- .lower();
-
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
+
+ getActionDefinitionsBuilder(G_ABS).lowerIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isScalar(); });
+
+ getActionDefinitionsBuilder(G_VECREDUCE_FADD)
+ // We only have FADDP to do reduction-like operations. Lower the rest.
+ .legalFor({{s32, v2s32}, {s64, v2s64}})
+ .lower();
+
+ getActionDefinitionsBuilder(G_VECREDUCE_ADD)
+ .legalFor({{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s64, v2s64}})
+ .lower();
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -714,63 +714,63 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_TRUNC:
- return legalizeVectorTrunc(MI, Helper);
+ case TargetOpcode::G_TRUNC:
+ return legalizeVectorTrunc(MI, Helper);
}
llvm_unreachable("expected switch to return");
}
-static void extractParts(Register Reg, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
- SmallVectorImpl<Register> &VRegs) {
- for (int I = 0; I < NumParts; ++I)
- VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- MIRBuilder.buildUnmerge(VRegs, Reg);
-}
-
-bool AArch64LegalizerInfo::legalizeVectorTrunc(
- MachineInstr &MI, LegalizerHelper &Helper) const {
- MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
- // Similar to how operand splitting is done in SelectiondDAG, we can handle
- // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
- // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
- // %lo16(<4 x s16>) = G_TRUNC %inlo
- // %hi16(<4 x s16>) = G_TRUNC %inhi
- // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
- // %res(<8 x s8>) = G_TRUNC %in16
-
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
- isPowerOf2_32(SrcTy.getSizeInBits()));
-
- // Split input type.
- LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
- // First, split the source into two smaller vectors.
- SmallVector<Register, 2> SplitSrcs;
- extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
-
- // Truncate the splits into intermediate narrower elements.
- LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
- for (unsigned I = 0; I < SplitSrcs.size(); ++I)
- SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
-
- auto Concat = MIRBuilder.buildConcatVectors(
- DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
-
- Helper.Observer.changingInstr(MI);
- MI.getOperand(1).setReg(Concat.getReg(0));
- Helper.Observer.changedInstr(MI);
- return true;
-}
-
-bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
- GISelChangeObserver &Observer) const {
+static void extractParts(Register Reg, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs) {
+ for (int I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool AArch64LegalizerInfo::legalizeVectorTrunc(
+ MachineInstr &MI, LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // Similar to how operand splitting is done in SelectiondDAG, we can handle
+ // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+ // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+ // %lo16(<4 x s16>) = G_TRUNC %inlo
+ // %hi16(<4 x s16>) = G_TRUNC %inhi
+ // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+ // %res(<8 x s8>) = G_TRUNC %in16
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+ assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
+ isPowerOf2_32(SrcTy.getSizeInBits()));
+
+ // Split input type.
+ LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
+ // First, split the source into two smaller vectors.
+ SmallVector<Register, 2> SplitSrcs;
+ extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
+
+ // Truncate the splits into intermediate narrower elements.
+ LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+ for (unsigned I = 0; I < SplitSrcs.size(); ++I)
+ SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+
+ auto Concat = MIRBuilder.buildConcatVectors(
+ DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
+
+ Helper.Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Concat.getReg(0));
+ Helper.Observer.changedInstr(MI);
+ return true;
+}
+
+bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
// We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
// G_ADD_LOW instructions.
@@ -792,27 +792,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// Set the regclass on the dest reg too.
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
- // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
- // by creating a MOVK that sets bits 48-63 of the register to (global address
- // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
- // prevent an incorrect tag being generated during relocation when the the
- // global appears before the code section. Without the offset, a global at
- // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
- // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
- // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
- // instead of `0xf`.
- // This assumes that we're in the small code model so we can assume a binary
- // size of <= 4GB, which makes the untagged PC relative offset positive. The
- // binary must also be loaded into address range [0, 2^48). Both of these
- // properties need to be ensured at runtime when using tagged addresses.
- if (OpFlags & AArch64II::MO_TAGGED) {
- ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
- .addGlobalAddress(GV, 0x100000000,
- AArch64II::MO_PREL | AArch64II::MO_G3)
- .addImm(48);
- MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
- }
-
+ // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
+ // by creating a MOVK that sets bits 48-63 of the register to (global address
+ // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
+ // prevent an incorrect tag being generated during relocation when the the
+ // global appears before the code section. Without the offset, a global at
+ // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
+ // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
+ // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
+ // instead of `0xf`.
+ // This assumes that we're in the small code model so we can assume a binary
+ // size of <= 4GB, which makes the untagged PC relative offset positive. The
+ // binary must also be loaded into address range [0, 2^48). Both of these
+ // properties need to be ensured at runtime when using tagged addresses.
+ if (OpFlags & AArch64II::MO_TAGGED) {
+ ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
+ .addGlobalAddress(GV, 0x100000000,
+ AArch64II::MO_PREL | AArch64II::MO_G3)
+ .addImm(48);
+ MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
+ }
+
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
.addGlobalAddress(GV, 0,
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
@@ -820,8 +820,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
return true;
}
-bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
- MachineInstr &MI) const {
+bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
return true;
}
@@ -838,13 +838,13 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
- int64_t Amount = VRegAndVal->Value.getSExtValue();
+ int64_t Amount = VRegAndVal->Value.getSExtValue();
if (Amount > 31)
return true; // This will have to remain a register variant.
auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
- Observer.changingInstr(MI);
+ Observer.changingInstr(MI);
MI.getOperand(2).setReg(ExtCst.getReg(0));
- Observer.changedInstr(MI);
+ Observer.changedInstr(MI);
return true;
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8217e37c85..c22cb26608 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -15,7 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
namespace llvm {
@@ -46,7 +46,7 @@ private:
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
- bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fdd04cb77f..bf3190ce93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -1,22 +1,22 @@
-//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
+//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization combines on generic MachineInstrs.
-///
-/// The combines here must preserve instruction legality.
-///
-/// Lowering combines (e.g. pseudo matching) should be handled by
-/// AArch64PostLegalizerLowering.
-///
-/// Combines which don't rely on instruction legality should go in the
-/// AArch64PreLegalizerCombiner.
-///
+///
+/// \file
+/// Post-legalization combines on generic MachineInstrs.
+///
+/// The combines here must preserve instruction legality.
+///
+/// Lowering combines (e.g. pseudo matching) should be handled by
+/// AArch64PostLegalizerLowering.
+///
+/// Combines which don't rely on instruction legality should go in the
+/// AArch64PreLegalizerCombiner.
+///
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
@@ -24,12 +24,12 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -37,202 +37,202 @@
using namespace llvm;
-/// This combine tries do what performExtractVectorEltCombine does in SDAG.
-/// Rewrite for pairwise fadd pattern
-/// (s32 (g_extract_vector_elt
-/// (g_fadd (vXs32 Other)
-/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
-/// ->
-/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
-/// (g_extract_vector_elt (vXs32 Other) 1))
-bool matchExtractVecEltPairwiseAdd(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- std::tuple<unsigned, LLT, Register> &MatchInfo) {
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
- auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
- if (!Cst || Cst->Value != 0)
+/// This combine tries do what performExtractVectorEltCombine does in SDAG.
+/// Rewrite for pairwise fadd pattern
+/// (s32 (g_extract_vector_elt
+/// (g_fadd (vXs32 Other)
+/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
+/// ->
+/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
+/// (g_extract_vector_elt (vXs32 Other) 1))
+bool matchExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
+ if (!Cst || Cst->Value != 0)
return false;
- // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
+ // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
- // Now check for an fadd operation. TODO: expand this for integer add?
- auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
- if (!FAddMI)
+ // Now check for an fadd operation. TODO: expand this for integer add?
+ auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
+ if (!FAddMI)
return false;
- // If we add support for integer add, must restrict these types to just s64.
- unsigned DstSize = DstTy.getSizeInBits();
- if (DstSize != 16 && DstSize != 32 && DstSize != 64)
+ // If we add support for integer add, must restrict these types to just s64.
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (DstSize != 16 && DstSize != 32 && DstSize != 64)
return false;
- Register Src1Op1 = FAddMI->getOperand(1).getReg();
- Register Src1Op2 = FAddMI->getOperand(2).getReg();
- MachineInstr *Shuffle =
- getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
- MachineInstr *Other = MRI.getVRegDef(Src1Op1);
- if (!Shuffle) {
- Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
- Other = MRI.getVRegDef(Src1Op2);
+ Register Src1Op1 = FAddMI->getOperand(1).getReg();
+ Register Src1Op2 = FAddMI->getOperand(2).getReg();
+ MachineInstr *Shuffle =
+ getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
+ MachineInstr *Other = MRI.getVRegDef(Src1Op1);
+ if (!Shuffle) {
+ Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
+ Other = MRI.getVRegDef(Src1Op2);
}
- // We're looking for a shuffle that moves the second element to index 0.
- if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
- Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
- std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
- std::get<1>(MatchInfo) = DstTy;
- std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
+ // We're looking for a shuffle that moves the second element to index 0.
+ if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
+ Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
+ std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
+ std::get<1>(MatchInfo) = DstTy;
+ std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
return true;
}
return false;
}
-bool applyExtractVecEltPairwiseAdd(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
- std::tuple<unsigned, LLT, Register> &MatchInfo) {
- unsigned Opc = std::get<0>(MatchInfo);
- assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
- // We want to generate two extracts of elements 0 and 1, and add them.
- LLT Ty = std::get<1>(MatchInfo);
- Register Src = std::get<2>(MatchInfo);
- LLT s64 = LLT::scalar(64);
- B.setInstrAndDebugLoc(MI);
- auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
- auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
- B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
- MI.eraseFromParent();
+bool applyExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ unsigned Opc = std::get<0>(MatchInfo);
+ assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
+ // We want to generate two extracts of elements 0 and 1, and add them.
+ LLT Ty = std::get<1>(MatchInfo);
+ Register Src = std::get<2>(MatchInfo);
+ LLT s64 = LLT::scalar(64);
+ B.setInstrAndDebugLoc(MI);
+ auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
+ auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
+ B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
+ MI.eraseFromParent();
return true;
}
-static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
- // TODO: check if extended build vector as well.
- unsigned Opc = MRI.getVRegDef(R)->getOpcode();
- return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
+ // TODO: check if extended build vector as well.
+ unsigned Opc = MRI.getVRegDef(R)->getOpcode();
+ return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
}
-static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
- // TODO: check if extended build vector as well.
- return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
+ // TODO: check if extended build vector as well.
+ return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
}
-bool matchAArch64MulConstCombine(
- MachineInstr &MI, MachineRegisterInfo &MRI,
- std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
- assert(MI.getOpcode() == TargetOpcode::G_MUL);
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
- Register Dst = MI.getOperand(0).getReg();
- const LLT Ty = MRI.getType(LHS);
-
- // The below optimizations require a constant RHS.
- auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
- if (!Const)
+bool matchAArch64MulConstCombine(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(LHS);
+
+ // The below optimizations require a constant RHS.
+ auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!Const)
return false;
- const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
- // The following code is ported from AArch64ISelLowering.
- // Multiplication of a power of two plus/minus one can be done more
- // cheaply as as shift+add/sub. For now, this is true unilaterally. If
- // future CPUs have a cheaper MADD instruction, this may need to be
- // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
- // 64-bit is 5 cycles, so this is always a win.
- // More aggressively, some multiplications N0 * C can be lowered to
- // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
- // e.g. 6=3*2=(2+1)*2.
- // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
- // which equals to (1+2)*16-(1+2).
- // TrailingZeroes is used to test if the mul can be lowered to
- // shift+add+shift.
- unsigned TrailingZeroes = ConstValue.countTrailingZeros();
- if (TrailingZeroes) {
- // Conservatively do not lower to shift+add+shift if the mul might be
- // folded into smul or umul.
- if (MRI.hasOneNonDBGUse(LHS) &&
- (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
- return false;
- // Conservatively do not lower to shift+add+shift if the mul might be
- // folded into madd or msub.
- if (MRI.hasOneNonDBGUse(Dst)) {
- MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
- if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
- UseMI.getOpcode() == TargetOpcode::G_SUB)
- return false;
- }
- }
- // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
- // and shift+add+shift.
- APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
-
- unsigned ShiftAmt, AddSubOpc;
- // Is the shifted value the LHS operand of the add/sub?
- bool ShiftValUseIsLHS = true;
- // Do we need to negate the result?
- bool NegateResult = false;
-
- if (ConstValue.isNonNegative()) {
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
- APInt SCVMinus1 = ShiftedConstValue - 1;
- APInt CVPlus1 = ConstValue + 1;
- if (SCVMinus1.isPowerOf2()) {
- ShiftAmt = SCVMinus1.logBase2();
- AddSubOpc = TargetOpcode::G_ADD;
- } else if (CVPlus1.isPowerOf2()) {
- ShiftAmt = CVPlus1.logBase2();
- AddSubOpc = TargetOpcode::G_SUB;
- } else
- return false;
- } else {
- // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
- // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
- APInt CVNegPlus1 = -ConstValue + 1;
- APInt CVNegMinus1 = -ConstValue - 1;
- if (CVNegPlus1.isPowerOf2()) {
- ShiftAmt = CVNegPlus1.logBase2();
- AddSubOpc = TargetOpcode::G_SUB;
- ShiftValUseIsLHS = false;
- } else if (CVNegMinus1.isPowerOf2()) {
- ShiftAmt = CVNegMinus1.logBase2();
- AddSubOpc = TargetOpcode::G_ADD;
- NegateResult = true;
- } else
- return false;
- }
-
- if (NegateResult && TrailingZeroes)
+ const APInt ConstValue = Const->Value.sextOrSelf(Ty.getSizeInBits());
+ // The following code is ported from AArch64ISelLowering.
+ // Multiplication of a power of two plus/minus one can be done more
+ // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // future CPUs have a cheaper MADD instruction, this may need to be
+ // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
+ // 64-bit is 5 cycles, so this is always a win.
+ // More aggressively, some multiplications N0 * C can be lowered to
+ // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
+ // e.g. 6=3*2=(2+1)*2.
+ // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
+ // which equals to (1+2)*16-(1+2).
+ // TrailingZeroes is used to test if the mul can be lowered to
+ // shift+add+shift.
+ unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+ if (TrailingZeroes) {
+ // Conservatively do not lower to shift+add+shift if the mul might be
+ // folded into smul or umul.
+ if (MRI.hasOneNonDBGUse(LHS) &&
+ (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
+ return false;
+ // Conservatively do not lower to shift+add+shift if the mul might be
+ // folded into madd or msub.
+ if (MRI.hasOneNonDBGUse(Dst)) {
+ MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
+ if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
+ UseMI.getOpcode() == TargetOpcode::G_SUB)
+ return false;
+ }
+ }
+ // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
+ // and shift+add+shift.
+ APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
+
+ unsigned ShiftAmt, AddSubOpc;
+ // Is the shifted value the LHS operand of the add/sub?
+ bool ShiftValUseIsLHS = true;
+ // Do we need to negate the result?
+ bool NegateResult = false;
+
+ if (ConstValue.isNonNegative()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
+ APInt SCVMinus1 = ShiftedConstValue - 1;
+ APInt CVPlus1 = ConstValue + 1;
+ if (SCVMinus1.isPowerOf2()) {
+ ShiftAmt = SCVMinus1.logBase2();
+ AddSubOpc = TargetOpcode::G_ADD;
+ } else if (CVPlus1.isPowerOf2()) {
+ ShiftAmt = CVPlus1.logBase2();
+ AddSubOpc = TargetOpcode::G_SUB;
+ } else
+ return false;
+ } else {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt CVNegPlus1 = -ConstValue + 1;
+ APInt CVNegMinus1 = -ConstValue - 1;
+ if (CVNegPlus1.isPowerOf2()) {
+ ShiftAmt = CVNegPlus1.logBase2();
+ AddSubOpc = TargetOpcode::G_SUB;
+ ShiftValUseIsLHS = false;
+ } else if (CVNegMinus1.isPowerOf2()) {
+ ShiftAmt = CVNegMinus1.logBase2();
+ AddSubOpc = TargetOpcode::G_ADD;
+ NegateResult = true;
+ } else
+ return false;
+ }
+
+ if (NegateResult && TrailingZeroes)
return false;
- ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
- auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
- auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
-
- Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
- Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
- auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
- assert(!(NegateResult && TrailingZeroes) &&
- "NegateResult and TrailingZeroes cannot both be true for now.");
- // Negate the result.
- if (NegateResult) {
- B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
- return;
- }
- // Shift the result.
- if (TrailingZeroes) {
- B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
- return;
- }
- B.buildCopy(DstReg, Res.getReg(0));
- };
+ ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
+ auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
+ auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
+
+ Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
+ Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
+ auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
+ assert(!(NegateResult && TrailingZeroes) &&
+ "NegateResult and TrailingZeroes cannot both be true for now.");
+ // Negate the result.
+ if (NegateResult) {
+ B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
+ return;
+ }
+ // Shift the result.
+ if (TrailingZeroes) {
+ B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
+ return;
+ }
+ B.buildCopy(DstReg, Res.getReg(0));
+ };
return true;
}
-bool applyAArch64MulConstCombine(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
- std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
- B.setInstrAndDebugLoc(MI);
- ApplyFn(B, MI.getOperand(0).getReg());
+bool applyAArch64MulConstCombine(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+ B.setInstrAndDebugLoc(MI);
+ ApplyFn(B, MI.getOperand(0).getReg());
MI.eraseFromParent();
return true;
}
@@ -348,7 +348,7 @@ INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,
false)
namespace llvm {
-FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
return new AArch64PostLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index a06ff4b541..0447c3e8a0 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1,704 +1,704 @@
-//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Post-legalization lowering for instructions.
-///
-/// This is used to offload pattern matching from the selector.
-///
-/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
-/// a G_ZIP, G_UZP, etc.
-///
-/// General optimization combines should be handled by either the
-/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AArch64TargetMachine.h"
-#include "AArch64GlobalISelUtils.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/Combiner.h"
-#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
-#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
-#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
-
-using namespace llvm;
-using namespace MIPatternMatch;
-using namespace AArch64GISelUtils;
-
-/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
-///
-/// Used for matching target-supported shuffles before codegen.
-struct ShuffleVectorPseudo {
- unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
- Register Dst; ///< Destination register.
- SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
- ShuffleVectorPseudo(unsigned Opc, Register Dst,
- std::initializer_list<SrcOp> SrcOps)
- : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
- ShuffleVectorPseudo() {}
-};
-
-/// Check if a vector shuffle corresponds to a REV instruction with the
-/// specified blocksize.
-static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
- unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for REV are: 16, 32, 64");
- assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
-
- unsigned BlockElts = M[0] + 1;
-
- // If the first shuffle index is UNDEF, be optimistic.
- if (M[0] < 0)
- BlockElts = BlockSize / EltSize;
-
- if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
- return false;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- // Ignore undef indices.
- if (M[i] < 0)
- continue;
- if (static_cast<unsigned>(M[i]) !=
- (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
- return false;
- }
-
- return true;
-}
-
-/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
-/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
-static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- if (NumElts % 2 != 0)
- return false;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
- (M[i + 1] >= 0 &&
- static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
- return false;
- }
- return true;
-}
-
-/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
-/// sources of the shuffle are different.
-static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
- unsigned NumElts) {
- // Look for the first non-undef element.
- auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
- if (FirstRealElt == M.end())
- return None;
-
- // Use APInt to handle overflow when calculating expected element.
- unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
- APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
-
- // The following shuffle indices must be the successive elements after the
- // first real element.
- if (any_of(
- make_range(std::next(FirstRealElt), M.end()),
- [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
- return None;
-
- // The index of an EXT is the first element if it is not UNDEF.
- // Watch out for the beginning UNDEFs. The EXT index should be the expected
- // value of the first element. E.g.
- // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
- // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
- // ExpectedElt is the last mask index plus 1.
- uint64_t Imm = ExpectedElt.getZExtValue();
- bool ReverseExt = false;
-
- // There are two difference cases requiring to reverse input vectors.
- // For example, for vector <4 x i32> we have the following cases,
- // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
- // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
- // For both cases, we finally use mask <5, 6, 7, 0>, which requires
- // to reverse two input vectors.
- if (Imm < NumElts)
- ReverseExt = true;
- else
- Imm -= NumElts;
- return std::make_pair(ReverseExt, Imm);
-}
-
-/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
-/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
-static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- // Skip undef indices.
- if (M[i] < 0)
- continue;
- if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
- return false;
- }
- return true;
-}
-
-/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
-/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
-static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
- if (NumElts % 2 != 0)
- return false;
-
- // 0 means use ZIP1, 1 means use ZIP2.
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
- (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
- return false;
- Idx += 1;
- }
- return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
-/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
-static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(Dst);
- unsigned EltSize = Ty.getScalarSizeInBits();
-
- // Element size for a rev cannot be 64.
- if (EltSize == 64)
- return false;
-
- unsigned NumElts = Ty.getNumElements();
-
- // Try to produce G_REV64
- if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
- MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
- return true;
- }
-
- // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
- // This should be identical to above, but with a constant 32 and constant
- // 16.
- return false;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_TRN1 or G_TRN2 instruction.
-static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
- return true;
-}
-
-/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
-/// a G_UZP1 or G_UZP2 instruction.
-///
-/// \param [in] MI - The shuffle vector instruction.
-/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
-static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
- return true;
-}
-
-static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- unsigned WhichResult;
- ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
- Register Dst = MI.getOperand(0).getReg();
- unsigned NumElts = MRI.getType(Dst).getNumElements();
- if (!isZipMask(ShuffleMask, NumElts, WhichResult))
- return false;
- unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
- return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- if (Lane != 0)
- return false;
-
- // Try to match a vector splat operation into a dup instruction.
- // We're looking for this pattern:
- //
- // %scalar:gpr(s64) = COPY $x0
- // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
- // %cst0:gpr(s32) = G_CONSTANT i32 0
- // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
- // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
- // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
- //
- // ...into:
- // %splat = G_DUP %scalar
-
- // Begin matching the insert.
- auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
- MI.getOperand(1).getReg(), MRI);
- if (!InsMI)
- return false;
- // Match the undef vector operand.
- if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
- MRI))
- return false;
-
- // Match the index constant 0.
- if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
- return false;
-
- MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
- {InsMI->getOperand(2).getReg()});
- return true;
-}
-
-/// Helper function for matchDup.
-static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(Lane >= 0 && "Expected positive lane?");
- // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
- // lane's definition directly.
- auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
- MI.getOperand(1).getReg(), MRI);
- if (!BuildVecMI)
- return false;
- Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
- MatchInfo =
- ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
- return true;
-}
-
-static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- auto MaybeLane = getSplatIndex(MI);
- if (!MaybeLane)
- return false;
- int Lane = *MaybeLane;
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane < 0)
- Lane = 0;
- if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
- return true;
- if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
- return true;
- return false;
-}
-
-static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- Register Dst = MI.getOperand(0).getReg();
- auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
- MRI.getType(Dst).getNumElements());
- if (!ExtInfo)
- return false;
- bool ReverseExt;
- uint64_t Imm;
- std::tie(ReverseExt, Imm) = *ExtInfo;
- Register V1 = MI.getOperand(1).getReg();
- Register V2 = MI.getOperand(2).getReg();
- if (ReverseExt)
- std::swap(V1, V2);
- uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
- Imm *= ExtFactor;
- MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
- return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
-/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI,
- ShuffleVectorPseudo &MatchInfo) {
- MachineIRBuilder MIRBuilder(MI);
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
- MI.eraseFromParent();
- return true;
-}
-
-/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
-/// Special-cased because the constant operand must be emitted as a G_CONSTANT
-/// for the imported tablegen patterns to work.
-static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
- MachineIRBuilder MIRBuilder(MI);
- // Tablegen patterns expect an i32 G_CONSTANT as the final op.
- auto Cst =
- MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
- MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
- {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
- MI.eraseFromParent();
- return true;
-}
-
-/// isVShiftRImm - Check if this is a valid vector for the immediate
-/// operand of a vector shift right operation. The value must be in the range:
-/// 1 <= Value <= ElementBits for a right shift.
-static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
- int64_t &Cnt) {
- assert(Ty.isVector() && "vector shift count is not a vector type");
- MachineInstr *MI = MRI.getVRegDef(Reg);
- auto Cst = getBuildVectorConstantSplat(*MI, MRI);
- if (!Cst)
- return false;
- Cnt = *Cst;
- int64_t ElementBits = Ty.getScalarSizeInBits();
- return Cnt >= 1 && Cnt <= ElementBits;
-}
-
-/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
-static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
- int64_t &Imm) {
- assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
- MI.getOpcode() == TargetOpcode::G_LSHR);
- LLT Ty = MRI.getType(MI.getOperand(1).getReg());
- if (!Ty.isVector())
- return false;
- return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
-}
-
-static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
- int64_t &Imm) {
- unsigned Opc = MI.getOpcode();
- assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
- unsigned NewOpc =
- Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
- MachineIRBuilder MIB(MI);
- auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
- MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
- MI.eraseFromParent();
- return true;
-}
-
-/// Determine if it is possible to modify the \p RHS and predicate \p P of a
-/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
-///
-/// \returns A pair containing the updated immediate and predicate which may
-/// be used to optimize the instruction.
-///
-/// \note This assumes that the comparison has been legalized.
-Optional<std::pair<uint64_t, CmpInst::Predicate>>
-tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
- const MachineRegisterInfo &MRI) {
- const auto &Ty = MRI.getType(RHS);
- if (Ty.isVector())
- return None;
- unsigned Size = Ty.getSizeInBits();
- assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
-
- // If the RHS is not a constant, or the RHS is already a valid arithmetic
- // immediate, then there is nothing to change.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
- if (!ValAndVReg)
- return None;
- uint64_t C = ValAndVReg->Value.getZExtValue();
- if (isLegalArithImmed(C))
- return None;
-
- // We have a non-arithmetic immediate. Check if adjusting the immediate and
- // adjusting the predicate will result in a legal arithmetic immediate.
- switch (P) {
- default:
- return None;
- case CmpInst::ICMP_SLT:
- case CmpInst::ICMP_SGE:
- // Check for
- //
- // x slt c => x sle c - 1
- // x sge c => x sgt c - 1
- //
- // When c is not the smallest possible negative number.
- if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
- (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
- return None;
- P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
- C -= 1;
- break;
- case CmpInst::ICMP_ULT:
- case CmpInst::ICMP_UGE:
- // Check for
- //
- // x ult c => x ule c - 1
- // x uge c => x ugt c - 1
- //
- // When c is not zero.
- if (C == 0)
- return None;
- P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
- C -= 1;
- break;
- case CmpInst::ICMP_SLE:
- case CmpInst::ICMP_SGT:
- // Check for
- //
- // x sle c => x slt c + 1
- // x sgt c => s sge c + 1
- //
- // When c is not the largest possible signed integer.
- if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
- (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
- return None;
- P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
- C += 1;
- break;
- case CmpInst::ICMP_ULE:
- case CmpInst::ICMP_UGT:
- // Check for
- //
- // x ule c => x ult c + 1
- // x ugt c => s uge c + 1
- //
- // When c is not the largest possible unsigned integer.
- if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
- (Size == 64 && C == UINT64_MAX))
- return None;
- P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
- C += 1;
- break;
- }
-
- // Check if the new constant is valid, and return the updated constant and
- // predicate if it is.
- if (Size == 32)
- C = static_cast<uint32_t>(C);
- if (!isLegalArithImmed(C))
- return None;
- return {{C, P}};
-}
-
-/// Determine whether or not it is possible to update the RHS and predicate of
-/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
-/// immediate.
-///
-/// \p MI - The G_ICMP instruction
-/// \p MatchInfo - The new RHS immediate and predicate on success
-///
-/// See tryAdjustICmpImmAndPred for valid transformations.
-bool matchAdjustICmpImmAndPred(
- MachineInstr &MI, const MachineRegisterInfo &MRI,
- std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_ICMP);
- Register RHS = MI.getOperand(3).getReg();
- auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
- if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
- MatchInfo = *MaybeNewImmAndPred;
- return true;
- }
- return false;
-}
-
-bool applyAdjustICmpImmAndPred(
- MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
- MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
- MIB.setInstrAndDebugLoc(MI);
- MachineOperand &RHS = MI.getOperand(3);
- MachineRegisterInfo &MRI = *MIB.getMRI();
- auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
- MatchInfo.first);
- Observer.changingInstr(MI);
- RHS.setReg(Cst->getOperand(0).getReg());
- MI.getOperand(1).setPredicate(MatchInfo.second);
- Observer.changedInstr(MI);
- return true;
-}
-
-bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
- std::pair<unsigned, int> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- Register Src1Reg = MI.getOperand(1).getReg();
- const LLT SrcTy = MRI.getType(Src1Reg);
- const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
-
- auto LaneIdx = getSplatIndex(MI);
- if (!LaneIdx)
- return false;
-
- // The lane idx should be within the first source vector.
- if (*LaneIdx >= SrcTy.getNumElements())
- return false;
-
- if (DstTy != SrcTy)
- return false;
-
- LLT ScalarTy = SrcTy.getElementType();
- unsigned ScalarSize = ScalarTy.getSizeInBits();
-
- unsigned Opc = 0;
- switch (SrcTy.getNumElements()) {
- case 2:
- if (ScalarSize == 64)
- Opc = AArch64::G_DUPLANE64;
- break;
- case 4:
- if (ScalarSize == 32)
- Opc = AArch64::G_DUPLANE32;
- break;
- case 8:
- if (ScalarSize == 16)
- Opc = AArch64::G_DUPLANE16;
- break;
- case 16:
- if (ScalarSize == 8)
- Opc = AArch64::G_DUPLANE8;
- break;
- default:
- break;
- }
- if (!Opc)
- return false;
-
- MatchInfo.first = Opc;
- MatchInfo.second = *LaneIdx;
- return true;
-}
-
-bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
- B.setInstrAndDebugLoc(MI);
- auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
- B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
- {MI.getOperand(1).getReg(), Lane});
- MI.eraseFromParent();
- return true;
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
-
-namespace {
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
-
-class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
-public:
- AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
-
- AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
- : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
- /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
- MinSize) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
- report_fatal_error("Invalid rule identifier");
- }
-
- virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
- MachineIRBuilder &B) const override;
-};
-
-bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B);
- AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
- return Generated.tryCombineAll(Observer, MI, B, Helper);
-}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-
-class AArch64PostLegalizerLowering : public MachineFunctionPass {
-public:
- static char ID;
-
- AArch64PostLegalizerLowering();
-
- StringRef getPassName() const override {
- return "AArch64PostLegalizerLowering";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-} // end anonymous namespace
-
-void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetPassConfig>();
- AU.setPreservesCFG();
- getSelectionDAGFallbackAnalysisUsage(AU);
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
- : MachineFunctionPass(ID) {
- initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
-}
-
-bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
- assert(MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Legalized) &&
- "Expected a legalized function?");
- auto *TPC = &getAnalysis<TargetPassConfig>();
- const Function &F = MF.getFunction();
- AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
- Combiner C(PCInfo, TPC);
- return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
-}
-
-char AArch64PostLegalizerLowering::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
- "Lower AArch64 MachineInstrs after legalization", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
- "Lower AArch64 MachineInstrs after legalization", false,
- false)
-
-namespace llvm {
-FunctionPass *createAArch64PostLegalizerLowering() {
- return new AArch64PostLegalizerLowering();
-}
-} // end namespace llvm
+//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Post-legalization lowering for instructions.
+///
+/// This is used to offload pattern matching from the selector.
+///
+/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
+/// a G_ZIP, G_UZP, etc.
+///
+/// General optimization combines should be handled by either the
+/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AArch64TargetMachine.h"
+#include "AArch64GlobalISelUtils.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+using namespace AArch64GISelUtils;
+
+/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
+///
+/// Used for matching target-supported shuffles before codegen.
+struct ShuffleVectorPseudo {
+ unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
+ Register Dst; ///< Destination register.
+ SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
+ ShuffleVectorPseudo(unsigned Opc, Register Dst,
+ std::initializer_list<SrcOp> SrcOps)
+ : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
+ ShuffleVectorPseudo() {}
+};
+
+/// Check if a vector shuffle corresponds to a REV instruction with the
+/// specified blocksize.
+static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
+ unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+ assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
+
+ unsigned BlockElts = M[0] + 1;
+
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSize;
+
+ if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ // Ignore undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) !=
+ (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
+ (M[i + 1] >= 0 &&
+ static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
+ return false;
+ }
+ return true;
+}
+
+/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
+/// sources of the shuffle are different.
+static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
+ unsigned NumElts) {
+ // Look for the first non-undef element.
+ auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
+ if (FirstRealElt == M.end())
+ return None;
+
+ // Use APInt to handle overflow when calculating expected element.
+ unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
+ APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
+
+ // The following shuffle indices must be the successive elements after the
+ // first real element.
+ if (any_of(
+ make_range(std::next(FirstRealElt), M.end()),
+ [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
+ return None;
+
+ // The index of an EXT is the first element if it is not UNDEF.
+ // Watch out for the beginning UNDEFs. The EXT index should be the expected
+ // value of the first element. E.g.
+ // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
+ // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
+ // ExpectedElt is the last mask index plus 1.
+ uint64_t Imm = ExpectedElt.getZExtValue();
+ bool ReverseExt = false;
+
+ // There are two difference cases requiring to reverse input vectors.
+ // For example, for vector <4 x i32> we have the following cases,
+ // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
+ // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
+ // For both cases, we finally use mask <5, 6, 7, 0>, which requires
+ // to reverse two input vectors.
+ if (Imm < NumElts)
+ ReverseExt = true;
+ else
+ Imm -= NumElts;
+ return std::make_pair(ReverseExt, Imm);
+}
+
+/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
+/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
+static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Skip undef indices.
+ if (M[i] < 0)
+ continue;
+ if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
+ return false;
+ }
+ return true;
+}
+
+/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
+/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
+static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
+ unsigned &WhichResult) {
+ if (NumElts % 2 != 0)
+ return false;
+
+ // 0 means use ZIP1, 1 means use ZIP2.
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
+ (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
+/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
+static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned EltSize = Ty.getScalarSizeInBits();
+
+ // Element size for a rev cannot be 64.
+ if (EltSize == 64)
+ return false;
+
+ unsigned NumElts = Ty.getNumElements();
+
+ // Try to produce G_REV64
+ if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
+ return true;
+ }
+
+ // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
+ // This should be identical to above, but with a constant 32 and constant
+ // 16.
+ return false;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_TRN1 or G_TRN2 instruction.
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_UZP1 or G_UZP2 instruction.
+///
+/// \param [in] MI - The shuffle vector instruction.
+/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
+static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ unsigned WhichResult;
+ ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+ Register Dst = MI.getOperand(0).getReg();
+ unsigned NumElts = MRI.getType(Dst).getNumElements();
+ if (!isZipMask(ShuffleMask, NumElts, WhichResult))
+ return false;
+ unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ if (Lane != 0)
+ return false;
+
+ // Try to match a vector splat operation into a dup instruction.
+ // We're looking for this pattern:
+ //
+ // %scalar:gpr(s64) = COPY $x0
+ // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ // %cst0:gpr(s32) = G_CONSTANT i32 0
+ // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+ // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
+ //
+ // ...into:
+ // %splat = G_DUP %scalar
+
+ // Begin matching the insert.
+ auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ if (!InsMI)
+ return false;
+ // Match the undef vector operand.
+ if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
+ MRI))
+ return false;
+
+ // Match the index constant 0.
+ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
+ return false;
+
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
+ {InsMI->getOperand(2).getReg()});
+ return true;
+}
+
+/// Helper function for matchDup.
+static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(Lane >= 0 && "Expected positive lane?");
+ // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
+ // lane's definition directly.
+ auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
+ MI.getOperand(1).getReg(), MRI);
+ if (!BuildVecMI)
+ return false;
+ Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
+ MatchInfo =
+ ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
+ return true;
+}
+
+static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ auto MaybeLane = getSplatIndex(MI);
+ if (!MaybeLane)
+ return false;
+ int Lane = *MaybeLane;
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane < 0)
+ Lane = 0;
+ if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
+ return true;
+ if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
+ return true;
+ return false;
+}
+
+static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Dst = MI.getOperand(0).getReg();
+ auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(),
+ MRI.getType(Dst).getNumElements());
+ if (!ExtInfo)
+ return false;
+ bool ReverseExt;
+ uint64_t Imm;
+ std::tie(ReverseExt, Imm) = *ExtInfo;
+ Register V1 = MI.getOperand(1).getReg();
+ Register V2 = MI.getOperand(2).getReg();
+ if (ReverseExt)
+ std::swap(V1, V2);
+ uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
+ Imm *= ExtFactor;
+ MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
+/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
+static bool applyShuffleVectorPseudo(MachineInstr &MI,
+ ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
+/// Special-cased because the constant operand must be emitted as a G_CONSTANT
+/// for the imported tablegen patterns to work.
+static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
+ MachineIRBuilder MIRBuilder(MI);
+ // Tablegen patterns expect an i32 G_CONSTANT as the final op.
+ auto Cst =
+ MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
+ MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
+ {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
+ MI.eraseFromParent();
+ return true;
+}
+
+/// isVShiftRImm - Check if this is a valid vector for the immediate
+/// operand of a vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits for a right shift.
+static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
+ int64_t &Cnt) {
+ assert(Ty.isVector() && "vector shift count is not a vector type");
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ auto Cst = getBuildVectorConstantSplat(*MI, MRI);
+ if (!Cst)
+ return false;
+ Cnt = *Cst;
+ int64_t ElementBits = Ty.getScalarSizeInBits();
+ return Cnt >= 1 && Cnt <= ElementBits;
+}
+
+/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
+static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+ MI.getOpcode() == TargetOpcode::G_LSHR);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ if (!Ty.isVector())
+ return false;
+ return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
+}
+
+static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
+ unsigned NewOpc =
+ Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
+ MachineIRBuilder MIB(MI);
+ auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MI.eraseFromParent();
+ return true;
+}
+
+/// Determine if it is possible to modify the \p RHS and predicate \p P of a
+/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
+///
+/// \returns A pair containing the updated immediate and predicate which may
+/// be used to optimize the instruction.
+///
+/// \note This assumes that the comparison has been legalized.
+Optional<std::pair<uint64_t, CmpInst::Predicate>>
+tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
+ const MachineRegisterInfo &MRI) {
+ const auto &Ty = MRI.getType(RHS);
+ if (Ty.isVector())
+ return None;
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
+
+ // If the RHS is not a constant, or the RHS is already a valid arithmetic
+ // immediate, then there is nothing to change.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!ValAndVReg)
+ return None;
+ uint64_t C = ValAndVReg->Value.getZExtValue();
+ if (isLegalArithImmed(C))
+ return None;
+
+ // We have a non-arithmetic immediate. Check if adjusting the immediate and
+ // adjusting the predicate will result in a legal arithmetic immediate.
+ switch (P) {
+ default:
+ return None;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ // Check for
+ //
+ // x slt c => x sle c - 1
+ // x sge c => x sgt c - 1
+ //
+ // When c is not the smallest possible negative number.
+ if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
+ (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
+ return None;
+ P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
+ C -= 1;
+ break;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_UGE:
+ // Check for
+ //
+ // x ult c => x ule c - 1
+ // x uge c => x ugt c - 1
+ //
+ // When c is not zero.
+ if (C == 0)
+ return None;
+ P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ C -= 1;
+ break;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::ICMP_SGT:
+ // Check for
+ //
+ // x sle c => x slt c + 1
+ // x sgt c => s sge c + 1
+ //
+ // When c is not the largest possible signed integer.
+ if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
+ (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
+ return None;
+ P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
+ C += 1;
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_UGT:
+ // Check for
+ //
+ // x ule c => x ult c + 1
+ // x ugt c => s uge c + 1
+ //
+ // When c is not the largest possible unsigned integer.
+ if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
+ (Size == 64 && C == UINT64_MAX))
+ return None;
+ P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
+ C += 1;
+ break;
+ }
+
+ // Check if the new constant is valid, and return the updated constant and
+ // predicate if it is.
+ if (Size == 32)
+ C = static_cast<uint32_t>(C);
+ if (!isLegalArithImmed(C))
+ return None;
+ return {{C, P}};
+}
+
+/// Determine whether or not it is possible to update the RHS and predicate of
+/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
+/// immediate.
+///
+/// \p MI - The G_ICMP instruction
+/// \p MatchInfo - The new RHS immediate and predicate on success
+///
+/// See tryAdjustICmpImmAndPred for valid transformations.
+bool matchAdjustICmpImmAndPred(
+ MachineInstr &MI, const MachineRegisterInfo &MRI,
+ std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ Register RHS = MI.getOperand(3).getReg();
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
+ MatchInfo = *MaybeNewImmAndPred;
+ return true;
+ }
+ return false;
+}
+
+bool applyAdjustICmpImmAndPred(
+ MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
+ MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
+ MIB.setInstrAndDebugLoc(MI);
+ MachineOperand &RHS = MI.getOperand(3);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
+ MatchInfo.first);
+ Observer.changingInstr(MI);
+ RHS.setReg(Cst->getOperand(0).getReg());
+ MI.getOperand(1).setPredicate(MatchInfo.second);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Src1Reg = MI.getOperand(1).getReg();
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto LaneIdx = getSplatIndex(MI);
+ if (!LaneIdx)
+ return false;
+
+ // The lane idx should be within the first source vector.
+ if (*LaneIdx >= SrcTy.getNumElements())
+ return false;
+
+ if (DstTy != SrcTy)
+ return false;
+
+ LLT ScalarTy = SrcTy.getElementType();
+ unsigned ScalarSize = ScalarTy.getSizeInBits();
+
+ unsigned Opc = 0;
+ switch (SrcTy.getNumElements()) {
+ case 2:
+ if (ScalarSize == 64)
+ Opc = AArch64::G_DUPLANE64;
+ break;
+ case 4:
+ if (ScalarSize == 32)
+ Opc = AArch64::G_DUPLANE32;
+ break;
+ case 8:
+ if (ScalarSize == 16)
+ Opc = AArch64::G_DUPLANE16;
+ break;
+ case 16:
+ if (ScalarSize == 8)
+ Opc = AArch64::G_DUPLANE8;
+ break;
+ default:
+ break;
+ }
+ if (!Opc)
+ return false;
+
+ MatchInfo.first = Opc;
+ MatchInfo.second = *LaneIdx;
+ return true;
+}
+
+bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ B.setInstrAndDebugLoc(MI);
+ auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
+ B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()},
+ {MI.getOperand(1).getReg(), Lane});
+ MI.eraseFromParent();
+ return true;
+}
+
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+
+class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
+public:
+ AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
+
+ AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
+ : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+ /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
+ MinSize) {
+ if (!GeneratedRuleCfg.parseCommandLineOption())
+ report_fatal_error("Invalid rule identifier");
+ }
+
+ virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+ MachineIRBuilder &B) const override;
+};
+
+bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B);
+ AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
+ return Generated.tryCombineAll(Observer, MI, B, Helper);
+}
+
+#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
+
+class AArch64PostLegalizerLowering : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PostLegalizerLowering();
+
+ StringRef getPassName() const override {
+ return "AArch64PostLegalizerLowering";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
+ : MachineFunctionPass(ID) {
+ initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized) &&
+ "Expected a legalized function?");
+ auto *TPC = &getAnalysis<TargetPassConfig>();
+ const Function &F = MF.getFunction();
+ AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
+ Combiner C(PCInfo, TPC);
+ return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
+}
+
+char AArch64PostLegalizerLowering::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
+ "Lower AArch64 MachineInstrs after legalization", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
+ "Lower AArch64 MachineInstrs after legalization", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAArch64PostLegalizerLowering() {
+ return new AArch64PostLegalizerLowering();
+}
+} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 2f882ecb1f..00436b5924 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -1,187 +1,187 @@
-//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass does post-instruction-selection optimizations in the GlobalISel
-// pipeline, before the rest of codegen runs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "aarch64-post-select-optimize"
-
-using namespace llvm;
-
-namespace {
-class AArch64PostSelectOptimize : public MachineFunctionPass {
-public:
- static char ID;
-
- AArch64PostSelectOptimize();
-
- StringRef getPassName() const override {
- return "AArch64 Post Select Optimizer";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-private:
- bool optimizeNZCVDefs(MachineBasicBlock &MBB);
-};
-} // end anonymous namespace
-
-void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetPassConfig>();
- AU.setPreservesCFG();
- getSelectionDAGFallbackAnalysisUsage(AU);
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-AArch64PostSelectOptimize::AArch64PostSelectOptimize()
- : MachineFunctionPass(ID) {
- initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
-}
-
-unsigned getNonFlagSettingVariant(unsigned Opc) {
- switch (Opc) {
- default:
- return 0;
- case AArch64::SUBSXrr:
- return AArch64::SUBXrr;
- case AArch64::SUBSWrr:
- return AArch64::SUBWrr;
- case AArch64::SUBSXrs:
- return AArch64::SUBXrs;
- case AArch64::SUBSXri:
- return AArch64::SUBXri;
- case AArch64::SUBSWri:
- return AArch64::SUBWri;
- }
-}
-
-bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
- // Consider the following code:
- // FCMPSrr %0, %1, implicit-def $nzcv
- // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
- // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
- // FCMPSrr %0, %1, implicit-def $nzcv
- // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
- // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
- // when we have a single IR fcmp being used by two selects. During selection,
- // to ensure that there can be no clobbering of nzcv between the fcmp and the
- // csel, we have to generate an fcmp immediately before each csel is
- // selected.
- // However, often we can essentially CSE these together later in MachineCSE.
- // This doesn't work though if there are unrelated flag-setting instructions
- // in between the two FCMPs. In this case, the SUBS defines NZCV
- // but it doesn't have any users, being overwritten by the second FCMP.
- //
- // Our solution here is to try to convert flag setting operations between
- // a interval of identical FCMPs, so that CSE will be able to eliminate one.
- bool Changed = false;
- const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
-
- // The first step is to find the first and last FCMPs. If we have found
- // at least two, then set the limit of the bottom-up walk to the first FCMP
- // found since we're only interested in dealing with instructions between
- // them.
- MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
- for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
- if (MI.getOpcode() == AArch64::FCMPSrr ||
- MI.getOpcode() == AArch64::FCMPDrr) {
- if (!FirstCmp)
- FirstCmp = &MI;
- else
- LastCmp = &MI;
- }
- }
-
- // In addition to converting flag-setting ops in fcmp ranges into non-flag
- // setting ops, across the whole basic block we also detect when nzcv
- // implicit-defs are dead, and mark them as dead. Peephole optimizations need
- // this information later.
-
- LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- LRU.addLiveOuts(MBB);
- bool NZCVDead = LRU.available(AArch64::NZCV);
- bool InsideCmpRange = false;
- for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
- LRU.stepBackward(II);
-
- if (LastCmp) { // There's a range present in this block.
- // If we're inside an fcmp range, look for begin instruction.
- if (InsideCmpRange && &II == FirstCmp)
- InsideCmpRange = false;
- else if (&II == LastCmp)
- InsideCmpRange = true;
- }
-
- // Did this instruction define NZCV?
- bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
- if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
- // If we have a def and NZCV is dead, then we may convert this op.
- unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
- int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
- if (DeadNZCVIdx != -1) {
- // If we're inside an fcmp range, then convert flag setting ops.
- if (InsideCmpRange && NewOpc) {
- LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
- "op in fcmp range: "
- << II);
- II.setDesc(TII->get(NewOpc));
- II.RemoveOperand(DeadNZCVIdx);
- Changed |= true;
- } else {
- // Otherwise, we just set the nzcv imp-def operand to be dead, so the
- // peephole optimizations can optimize them further.
- II.getOperand(DeadNZCVIdx).setIsDead();
- }
- }
- }
-
- NZCVDead = NZCVDeadAtCurrInstr;
- }
- return Changed;
-}
-
-bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
- assert(MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::Selected) &&
- "Expected a selected MF");
-
- bool Changed = false;
- for (auto &BB : MF)
- Changed |= optimizeNZCVDefs(BB);
- return true;
-}
-
-char AArch64PostSelectOptimize::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
- "Optimize AArch64 selected instructions",
- false, false)
-INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
- "Optimize AArch64 selected instructions", false,
- false)
-
-namespace llvm {
-FunctionPass *createAArch64PostSelectOptimize() {
- return new AArch64PostSelectOptimize();
-}
-} // end namespace llvm
+//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does post-instruction-selection optimizations in the GlobalISel
+// pipeline, before the rest of codegen runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-post-select-optimize"
+
+using namespace llvm;
+
+namespace {
+class AArch64PostSelectOptimize : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AArch64PostSelectOptimize();
+
+ StringRef getPassName() const override {
+ return "AArch64 Post Select Optimizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+ bool optimizeNZCVDefs(MachineBasicBlock &MBB);
+};
+} // end anonymous namespace
+
+void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64PostSelectOptimize::AArch64PostSelectOptimize()
+ : MachineFunctionPass(ID) {
+ initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
+}
+
+unsigned getNonFlagSettingVariant(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return 0;
+ case AArch64::SUBSXrr:
+ return AArch64::SUBXrr;
+ case AArch64::SUBSWrr:
+ return AArch64::SUBWrr;
+ case AArch64::SUBSXrs:
+ return AArch64::SUBXrs;
+ case AArch64::SUBSXri:
+ return AArch64::SUBXri;
+ case AArch64::SUBSWri:
+ return AArch64::SUBWri;
+ }
+}
+
+bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
+ // Consider the following code:
+ // FCMPSrr %0, %1, implicit-def $nzcv
+ // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
+ // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
+ // FCMPSrr %0, %1, implicit-def $nzcv
+ // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
+ // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
+ // when we have a single IR fcmp being used by two selects. During selection,
+ // to ensure that there can be no clobbering of nzcv between the fcmp and the
+ // csel, we have to generate an fcmp immediately before each csel is
+ // selected.
+ // However, often we can essentially CSE these together later in MachineCSE.
+ // This doesn't work though if there are unrelated flag-setting instructions
+ // in between the two FCMPs. In this case, the SUBS defines NZCV
+ // but it doesn't have any users, being overwritten by the second FCMP.
+ //
+ // Our solution here is to try to convert flag setting operations between
+ // a interval of identical FCMPs, so that CSE will be able to eliminate one.
+ bool Changed = false;
+ const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+
+ // The first step is to find the first and last FCMPs. If we have found
+ // at least two, then set the limit of the bottom-up walk to the first FCMP
+ // found since we're only interested in dealing with instructions between
+ // them.
+ MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
+ for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
+ if (MI.getOpcode() == AArch64::FCMPSrr ||
+ MI.getOpcode() == AArch64::FCMPDrr) {
+ if (!FirstCmp)
+ FirstCmp = &MI;
+ else
+ LastCmp = &MI;
+ }
+ }
+
+ // In addition to converting flag-setting ops in fcmp ranges into non-flag
+ // setting ops, across the whole basic block we also detect when nzcv
+ // implicit-defs are dead, and mark them as dead. Peephole optimizations need
+ // this information later.
+
+ LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
+ LRU.addLiveOuts(MBB);
+ bool NZCVDead = LRU.available(AArch64::NZCV);
+ bool InsideCmpRange = false;
+ for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+ LRU.stepBackward(II);
+
+ if (LastCmp) { // There's a range present in this block.
+ // If we're inside an fcmp range, look for begin instruction.
+ if (InsideCmpRange && &II == FirstCmp)
+ InsideCmpRange = false;
+ else if (&II == LastCmp)
+ InsideCmpRange = true;
+ }
+
+ // Did this instruction define NZCV?
+ bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
+ if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
+ // If we have a def and NZCV is dead, then we may convert this op.
+ unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
+ int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
+ if (DeadNZCVIdx != -1) {
+ // If we're inside an fcmp range, then convert flag setting ops.
+ if (InsideCmpRange && NewOpc) {
+ LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
+ "op in fcmp range: "
+ << II);
+ II.setDesc(TII->get(NewOpc));
+ II.RemoveOperand(DeadNZCVIdx);
+ Changed |= true;
+ } else {
+ // Otherwise, we just set the nzcv imp-def operand to be dead, so the
+ // peephole optimizations can optimize them further.
+ II.getOperand(DeadNZCVIdx).setIsDead();
+ }
+ }
+ }
+
+ NZCVDead = NZCVDeadAtCurrInstr;
+ }
+ return Changed;
+}
+
+bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ assert(MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected) &&
+ "Expected a selected MF");
+
+ bool Changed = false;
+ for (auto &BB : MF)
+ Changed |= optimizeNZCVDefs(BB);
+ return true;
+}
+
+char AArch64PostSelectOptimize::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
+ "Optimize AArch64 selected instructions",
+ false, false)
+INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
+ "Optimize AArch64 selected instructions", false,
+ false)
+
+namespace llvm {
+FunctionPass *createAArch64PostSelectOptimize() {
+ return new AArch64PostSelectOptimize();
+}
+} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 5f9b64e274..2686f6dc46 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -104,16 +104,16 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
- case TargetOpcode::G_MEMCPY:
- case TargetOpcode::G_MEMMOVE:
- case TargetOpcode::G_MEMSET: {
- // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
- // heuristics decide.
- unsigned MaxLen = EnableOpt ? 0 : 32;
- // Try to inline memcpy type calls if optimizations are enabled.
- return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
- }
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
+ // heuristics decide.
+ unsigned MaxLen = EnableOpt ? 0 : 32;
+ // Try to inline memcpy type calls if optimizations are enabled.
+ return !EnableMinSize ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false;
}
+ }
return false;
}
@@ -188,7 +188,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
namespace llvm {
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
+FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
return new AArch64PreLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index c76c43389b..e26fe60d93 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,7 +13,7 @@
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@@ -466,10 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
-bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- unsigned Depth) const {
+bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
unsigned Op = MI.getOpcode();
// Do we have an explicit floating point instruction?
@@ -481,30 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
if (Op != TargetOpcode::COPY && !MI.isPHI())
return false;
- // Check if we already know the register bank.
- auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
- if (RB == &AArch64::FPRRegBank)
- return true;
- if (RB == &AArch64::GPRRegBank)
- return false;
-
- // We don't know anything.
- //
- // If we have a phi, we may be able to infer that it will be assigned a FPR
- // based off of its inputs.
- if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
- return false;
-
- return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
- return Op.isReg() &&
- onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
- });
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &AArch64::FPRRegBank)
+ return true;
+ if (RB == &AArch64::GPRRegBank)
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a FPR
+ // based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
}
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- unsigned Depth) const {
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
@@ -513,13 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI, Depth);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
-bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- unsigned Depth) const {
+bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
@@ -530,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI, Depth);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
const RegisterBankInfo::InstructionMapping &
@@ -680,18 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
+ case TargetOpcode::G_UITOFP: {
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
- // Integer to FP conversions don't necessarily happen between GPR -> FPR
- // regbanks. They can also be done within an FPR register.
- Register SrcReg = MI.getOperand(1).getReg();
- if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- else
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ // Integer to FP conversions don't necessarily happen between GPR -> FPR
+ // regbanks. They can also be done within an FPR register.
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ else
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
- }
+ }
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -729,8 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
- // Int->FP conversion operations are also captured in onlyDefinesFP().
- if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
+ // Int->FP conversion operations are also captured in onlyDefinesFP().
+ if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
@@ -853,7 +853,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case TargetOpcode::G_BUILD_VECTOR: {
+ case TargetOpcode::G_BUILD_VECTOR: {
// If the first source operand belongs to a FPR register bank, then make
// sure that we preserve that.
if (OpRegBankIdx[1] != PMI_FirstGPR)
@@ -864,17 +864,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Get the instruction that defined the source operand reg, and check if
// it's a floating point operation. Or, if it's a type like s16 which
- // doesn't have a exact size gpr register class. The exception is if the
- // build_vector has all constant operands, which may be better to leave as
- // gpr without copies, so it can be matched in imported patterns.
+ // doesn't have a exact size gpr register class. The exception is if the
+ // build_vector has all constant operands, which may be better to leave as
+ // gpr without copies, so it can be matched in imported patterns.
MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned DefOpc = DefMI->getOpcode();
const LLT SrcTy = MRI.getType(VReg);
- if (all_of(MI.operands(), [&](const MachineOperand &Op) {
- return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
- TargetOpcode::G_CONSTANT;
- }))
- break;
+ if (all_of(MI.operands(), [&](const MachineOperand &Op) {
+ return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
+ TargetOpcode::G_CONSTANT;
+ }))
+ break;
if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
SrcTy.getSizeInBits() < 32) {
// Have a floating point op.
@@ -885,30 +885,30 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case TargetOpcode::G_VECREDUCE_FADD:
- case TargetOpcode::G_VECREDUCE_FMUL:
- case TargetOpcode::G_VECREDUCE_FMAX:
- case TargetOpcode::G_VECREDUCE_FMIN:
- case TargetOpcode::G_VECREDUCE_ADD:
- case TargetOpcode::G_VECREDUCE_MUL:
- case TargetOpcode::G_VECREDUCE_AND:
- case TargetOpcode::G_VECREDUCE_OR:
- case TargetOpcode::G_VECREDUCE_XOR:
- case TargetOpcode::G_VECREDUCE_SMAX:
- case TargetOpcode::G_VECREDUCE_SMIN:
- case TargetOpcode::G_VECREDUCE_UMAX:
- case TargetOpcode::G_VECREDUCE_UMIN:
- // Reductions produce a scalar value from a vector, the scalar should be on
- // FPR bank.
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- break;
- case TargetOpcode::G_VECREDUCE_SEQ_FADD:
- case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
- // These reductions also take a scalar accumulator input.
- // Assign them FPR for now.
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
- break;
- }
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN:
+ // Reductions produce a scalar value from a vector, the scalar should be on
+ // FPR bank.
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ case TargetOpcode::G_VECREDUCE_SEQ_FADD:
+ case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
+ // These reductions also take a scalar accumulator input.
+ // Assign them FPR for now.
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ }
// Finally construct the computed mapping.
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index 019017bc3e..c8cfe53299 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -114,20 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
const InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr &MI) const;
- /// Maximum recursion depth for hasFPConstraints.
- const unsigned MaxFPRSearchDepth = 2;
-
- /// \returns true if \p MI only uses and defines FPRs.
+ /// Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// \returns true if \p MI only uses FPRs.
+ /// \returns true if \p MI only uses FPRs.
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// \returns true if \p MI only defines FPRs.
+ /// \returns true if \p MI only defines FPRs.
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 2cbe8315bc..77b7c09946 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -763,8 +763,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
bool IsImm8 = int8_t(Imm) == Imm;
bool IsImm16 = int16_t(Imm & ~0xff) == Imm;
- if (std::is_same<int8_t, std::make_signed_t<T>>::value ||
- std::is_same<int8_t, T>::value)
+ if (std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value)
return IsImm8 || uint8_t(Imm) == Imm;
if (std::is_same<int16_t, std::make_signed_t<T>>::value)
@@ -776,8 +776,8 @@ static inline bool isSVECpyImm(int64_t Imm) {
/// Returns true if Imm is valid for ADD/SUB.
template <typename T>
static inline bool isSVEAddSubImm(int64_t Imm) {
- bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value ||
- std::is_same<int8_t, T>::value;
+ bool IsInt8t = std::is_same<int8_t, std::make_signed_t<T>>::value ||
+ std::is_same<int8_t, T>::value;
return uint8_t(Imm) == Imm || (!IsInt8t && uint16_t(Imm & ~0xff) == Imm);
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 75a9f2f5c8..33448cef46 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -758,7 +758,7 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() && "Invalid target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+ bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/true,
IsILP32);
}
@@ -771,7 +771,7 @@ MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
assert(TheTriple.isOSBinFormatELF() &&
"Big endian is only supported for ELF targets!");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
+ bool IsILP32 = STI.getTargetTriple().getEnvironment() == Triple::GNUILP32;
return new ELFAArch64AsmBackend(T, TheTriple, OSABI, /*IsLittleEndian=*/false,
IsILP32);
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index fcf67bd2f7..6c98ac4737 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -43,7 +43,7 @@ protected:
} // end anonymous namespace
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32)
- : MCELFObjectTargetWriter(/*Is64Bit*/ !IsILP32, OSABI, ELF::EM_AARCH64,
+ : MCELFObjectTargetWriter(/*Is64Bit*/ !IsILP32, OSABI, ELF::EM_AARCH64,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
@@ -322,11 +322,11 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
return R_CLS(LDST64_ABS_LO12_NC);
if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) {
- AArch64MCExpr::VariantKind AddressLoc =
- AArch64MCExpr::getAddressFrag(RefKind);
+ AArch64MCExpr::VariantKind AddressLoc =
+ AArch64MCExpr::getAddressFrag(RefKind);
if (!IsILP32) {
- if (AddressLoc == AArch64MCExpr::VK_LO15)
- return ELF::R_AARCH64_LD64_GOTPAGE_LO15;
+ if (AddressLoc == AArch64MCExpr::VK_LO15)
+ return ELF::R_AARCH64_LD64_GOTPAGE_LO15;
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index ec97e1c8b7..2135cf605b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -51,61 +51,61 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
OS << "\t.variant_pcs " << Symbol->getName() << "\n";
}
- void EmitARM64WinCFIAllocStack(unsigned Size) override {
- OS << "\t.seh_stackalloc " << Size << "\n";
- }
- void EmitARM64WinCFISaveR19R20X(int Offset) override {
- OS << "\t.seh_save_r19r20_x " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFPLR(int Offset) override {
- OS << "\t.seh_save_fplr " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFPLRX(int Offset) override {
- OS << "\t.seh_save_fplr_x " << Offset << "\n";
- }
- void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_reg x" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_reg_x x" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_regp x" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_freg_x d" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_fregp d" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override {
- OS << "\t.seh_save_fregp_x d" << Reg << ", " << Offset << "\n";
- }
- void EmitARM64WinCFISetFP() override { OS << "\t.seh_set_fp\n"; }
- void EmitARM64WinCFIAddFP(unsigned Size) override {
- OS << "\t.seh_add_fp " << Size << "\n";
- }
- void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; }
- void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; }
- void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; }
- void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; }
- void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; }
- void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; }
- void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; }
- void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; }
- void EmitARM64WinCFIClearUnwoundToCall() override {
- OS << "\t.seh_clear_unwound_to_call\n";
- }
-
+ void EmitARM64WinCFIAllocStack(unsigned Size) override {
+ OS << "\t.seh_stackalloc " << Size << "\n";
+ }
+ void EmitARM64WinCFISaveR19R20X(int Offset) override {
+ OS << "\t.seh_save_r19r20_x " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFPLR(int Offset) override {
+ OS << "\t.seh_save_fplr " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFPLRX(int Offset) override {
+ OS << "\t.seh_save_fplr_x " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_reg x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_reg_x x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_regp x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_regp_x x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_lrpair x" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_freg d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_freg_x d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_fregp d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISaveFRegPX(unsigned Reg, int Offset) override {
+ OS << "\t.seh_save_fregp_x d" << Reg << ", " << Offset << "\n";
+ }
+ void EmitARM64WinCFISetFP() override { OS << "\t.seh_set_fp\n"; }
+ void EmitARM64WinCFIAddFP(unsigned Size) override {
+ OS << "\t.seh_add_fp " << Size << "\n";
+ }
+ void EmitARM64WinCFINop() override { OS << "\t.seh_nop\n"; }
+ void EmitARM64WinCFISaveNext() override { OS << "\t.seh_save_next\n"; }
+ void EmitARM64WinCFIPrologEnd() override { OS << "\t.seh_endprologue\n"; }
+ void EmitARM64WinCFIEpilogStart() override { OS << "\t.seh_startepilogue\n"; }
+ void EmitARM64WinCFIEpilogEnd() override { OS << "\t.seh_endepilogue\n"; }
+ void EmitARM64WinCFITrapFrame() override { OS << "\t.seh_trap_frame\n"; }
+ void EmitARM64WinCFIMachineFrame() override { OS << "\t.seh_pushframe\n"; }
+ void EmitARM64WinCFIContext() override { OS << "\t.seh_context\n"; }
+ void EmitARM64WinCFIClearUnwoundToCall() override {
+ OS << "\t.seh_clear_unwound_to_call\n";
+ }
+
public:
AArch64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
};
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 340120d2b9..4aeb45ac21 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -849,7 +849,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
}
break;
}
- } else if (CnVal == 8 || CnVal == 9) {
+ } else if (CnVal == 8 || CnVal == 9) {
// TLBI aliases
const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
@@ -1377,8 +1377,8 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
}
}
-void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
- unsigned OpNum,
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
+ unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1386,11 +1386,11 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- const int64_t Offset = Op.getImm() * 4096;
- if (PrintBranchImmAsAddress)
- O << formatHex((Address & -4096) + Offset);
- else
- O << "#" << Offset;
+ const int64_t Offset = Op.getImm() * 4096;
+ if (PrintBranchImmAsAddress)
+ O << formatHex((Address & -4096) + Offset);
+ else
+ O << "#" << Offset;
return;
}
@@ -1421,22 +1421,22 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
O << "#" << Val;
}
-void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNo).getImm();
- assert(MI->getOpcode() == AArch64::DSBnXS);
-
- StringRef Name;
- auto DB = AArch64DBnXS::lookupDBnXSByEncoding(Val);
- Name = DB ? DB->Name : "";
-
- if (!Name.empty())
- O << Name;
- else
- O << "#" << Val;
-}
-
+void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ assert(MI->getOpcode() == AArch64::DSBnXS);
+
+ StringRef Name;
+ auto DB = AArch64DBnXS::lookupDBnXSByEncoding(Val);
+ Name = DB ? DB->Name : "";
+
+ if (!Name.empty())
+ O << Name;
+ else
+ O << "#" << Val;
+}
+
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1644,10 +1644,10 @@ void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
unsigned Reg = MI->getOperand(OpNum).getReg();
O << getRegisterName(getWRegFromXReg(Reg));
}
-
-void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- unsigned Reg = MI->getOperand(OpNum).getReg();
- O << getRegisterName(MRI.getSubReg(Reg, AArch64::x8sub_0));
-}
+
+void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ O << getRegisterName(MRI.getSubReg(Reg, AArch64::x8sub_0));
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 4be885e667..b1952ebd27 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -30,7 +30,7 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
virtual void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O);
virtual bool printAliasInstr(const MCInst *MI, uint64_t Address,
@@ -156,12 +156,12 @@ protected:
void printVectorIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
+ void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printBarriernXSOption(const MCInst *MI, unsigned OpNum,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBarriernXSOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
@@ -190,8 +190,8 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printGPR64as32(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printGPR64x8(const MCInst *MI, unsigned OpNum,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printGPR64x8(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <int Width>
void printZPRasFPR(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -208,7 +208,7 @@ public:
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI, raw_ostream &O) override;
bool printAliasInstr(const MCInst *MI, uint64_t Address,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 68c721cb0d..257ecd33d2 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -73,7 +73,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
// targeting ELF.
AssemblerDialect = AsmWriterVariant == Default ? Generic : AsmWriterVariant;
- CodePointerSize = T.getEnvironment() == Triple::GNUILP32 ? 4 : 8;
+ CodePointerSize = T.getEnvironment() == Triple::GNUILP32 ? 4 : 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
SupportsDebugInformation = true;
CodePointerSize = 8;
- CommentString = "//";
+ CommentString = "//";
ExceptionsType = ExceptionHandling::WinEH;
WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 844bd6bbad..dd975cd363 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -70,7 +70,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
case VK_ABS_PAGE_NC: return ":pg_hi21_nc:";
case VK_GOT: return ":got:";
case VK_GOT_PAGE: return ":got:";
- case VK_GOT_PAGE_LO15: return ":gotpage_lo15:";
+ case VK_GOT_PAGE_LO15: return ":gotpage_lo15:";
case VK_GOT_LO12: return ":got_lo12:";
case VK_GOTTPREL: return ":gottprel:";
case VK_GOTTPREL_PAGE: return ":gottprel:";
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index d3e834a140..6e191cd455 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -46,7 +46,7 @@ public:
VK_G1 = 0x050,
VK_G2 = 0x060,
VK_G3 = 0x070,
- VK_LO15 = 0x080,
+ VK_LO15 = 0x080,
VK_AddressFragBits = 0x0f0,
// Whether the final relocation is a checked one (where a linker should
@@ -83,7 +83,7 @@ public:
VK_PREL_G0_NC = VK_PREL | VK_G0 | VK_NC,
VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
VK_GOT_PAGE = VK_GOT | VK_PAGE,
- VK_GOT_PAGE_LO15 = VK_GOT | VK_LO15 | VK_NC,
+ VK_GOT_PAGE_LO15 = VK_GOT | VK_LO15 | VK_NC,
VK_DTPREL_G2 = VK_DTPREL | VK_G2,
VK_DTPREL_G1 = VK_DTPREL | VK_G1,
VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3c2df1621e..98dcd9a96a 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -50,14 +50,14 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
static MCSubtargetInfo *
createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- if (CPU.empty()) {
+ if (CPU.empty()) {
CPU = "generic";
- if (TT.isArm64e())
- CPU = "apple-a12";
- }
-
- return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ if (TT.isArm64e())
+ CPU = "apple-a12";
+ }
+
+ return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 012661edbb..f2384aa588 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -373,11 +373,11 @@ void AArch64MachObjectWriter::recordRelocation(
Type == MachO::ARM64_RELOC_PAGE21 ||
Type == MachO::ARM64_RELOC_PAGEOFF12) &&
Value) {
- if (!isInt<24>(Value)) {
- Asm.getContext().reportError(Fixup.getLoc(),
- "addend too big for relocation");
- return;
- }
+ if (!isInt<24>(Value)) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "addend too big for relocation");
+ return;
+ }
MachO::any_relocation_info MRE;
MRE.r_word0 = FixupOffset;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index f32a8f15b8..8f3e876061 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -11,23 +11,23 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
-#include "AArch64MCAsmInfo.h"
-#include "AArch64Subtarget.h"
-#include "llvm/BinaryFormat/ELF.h"
+#include "AArch64MCAsmInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
-static cl::opt<bool> MarkBTIProperty(
- "aarch64-mark-bti-property", cl::Hidden,
- cl::desc("Add .note.gnu.property with BTI to assembly files"),
- cl::init(false));
-
+static cl::opt<bool> MarkBTIProperty(
+ "aarch64-mark-bti-property", cl::Hidden,
+ cl::desc("Add .note.gnu.property with BTI to assembly files"),
+ cl::init(false));
+
//
// AArch64TargetStreamer Implemenation
//
@@ -48,51 +48,51 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
ConstantPools->emitForCurrentSection(Streamer);
}
-// finish() - write out any non-empty assembler constant pools and
-// write out note.gnu.properties if need.
-void AArch64TargetStreamer::finish() {
- ConstantPools->emitAll(Streamer);
-
- if (MarkBTIProperty)
- emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
-}
-
-void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
- if (Flags == 0)
- return;
-
- MCStreamer &OutStreamer = getStreamer();
- MCContext &Context = OutStreamer.getContext();
- // Emit a .note.gnu.property section with the flags.
- MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE,
- ELF::SHF_ALLOC);
- if (Nt->isRegistered()) {
- SMLoc Loc;
- Context.reportWarning(
- Loc,
- "The .note.gnu.property is not emitted because it is already present.");
- return;
- }
- MCSection *Cur = OutStreamer.getCurrentSectionOnly();
- OutStreamer.SwitchSection(Nt);
-
- // Emit the note header.
- OutStreamer.emitValueToAlignment(Align(8).value());
- OutStreamer.emitIntValue(4, 4); // data size for "GNU\0"
- OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size
- OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
- OutStreamer.emitBytes(StringRef("GNU", 4)); // note name
-
- // Emit the PAC/BTI properties.
- OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
- OutStreamer.emitIntValue(4, 4); // data size
- OutStreamer.emitIntValue(Flags, 4); // data
- OutStreamer.emitIntValue(0, 4); // pad
-
- OutStreamer.endSection(Nt);
- OutStreamer.SwitchSection(Cur);
-}
-
+// finish() - write out any non-empty assembler constant pools and
+// write out note.gnu.properties if need.
+void AArch64TargetStreamer::finish() {
+ ConstantPools->emitAll(Streamer);
+
+ if (MarkBTIProperty)
+ emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
+}
+
+void AArch64TargetStreamer::emitNoteSection(unsigned Flags) {
+ if (Flags == 0)
+ return;
+
+ MCStreamer &OutStreamer = getStreamer();
+ MCContext &Context = OutStreamer.getContext();
+ // Emit a .note.gnu.property section with the flags.
+ MCSectionELF *Nt = Context.getELFSection(".note.gnu.property", ELF::SHT_NOTE,
+ ELF::SHF_ALLOC);
+ if (Nt->isRegistered()) {
+ SMLoc Loc;
+ Context.reportWarning(
+ Loc,
+ "The .note.gnu.property is not emitted because it is already present.");
+ return;
+ }
+ MCSection *Cur = OutStreamer.getCurrentSectionOnly();
+ OutStreamer.SwitchSection(Nt);
+
+ // Emit the note header.
+ OutStreamer.emitValueToAlignment(Align(8).value());
+ OutStreamer.emitIntValue(4, 4); // data size for "GNU\0"
+ OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size
+ OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4);
+ OutStreamer.emitBytes(StringRef("GNU", 4)); // note name
+
+ // Emit the PAC/BTI properties.
+ OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4);
+ OutStreamer.emitIntValue(4, 4); // data size
+ OutStreamer.emitIntValue(Flags, 4); // data
+ OutStreamer.emitIntValue(0, 4); // pad
+
+ OutStreamer.endSection(Nt);
+ OutStreamer.SwitchSection(Cur);
+}
+
void AArch64TargetStreamer::emitInst(uint32_t Inst) {
char Buffer[4];
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 73dc1e5d4d..5212d70a57 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -33,9 +33,9 @@ public:
/// Emit contents of constant pool for the current section.
void emitCurrentConstantPool();
- /// Callback used to implement the .note.gnu.property section.
- void emitNoteSection(unsigned Flags);
-
+ /// Callback used to implement the .note.gnu.property section.
+ void emitNoteSection(unsigned Flags);
+
/// Callback used to implement the .inst directive.
virtual void emitInst(uint32_t Inst);
@@ -43,14 +43,14 @@ public:
virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {};
virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
- virtual void EmitARM64WinCFISaveR19R20X(int Offset) {}
+ virtual void EmitARM64WinCFISaveR19R20X(int Offset) {}
virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
virtual void EmitARM64WinCFISaveFPLRX(int Offset) {}
virtual void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) {}
- virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {}
+ virtual void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) {}
virtual void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) {}
@@ -58,14 +58,14 @@ public:
virtual void EmitARM64WinCFISetFP() {}
virtual void EmitARM64WinCFIAddFP(unsigned Size) {}
virtual void EmitARM64WinCFINop() {}
- virtual void EmitARM64WinCFISaveNext() {}
+ virtual void EmitARM64WinCFISaveNext() {}
virtual void EmitARM64WinCFIPrologEnd() {}
virtual void EmitARM64WinCFIEpilogStart() {}
virtual void EmitARM64WinCFIEpilogEnd() {}
- virtual void EmitARM64WinCFITrapFrame() {}
- virtual void EmitARM64WinCFIMachineFrame() {}
- virtual void EmitARM64WinCFIContext() {}
- virtual void EmitARM64WinCFIClearUnwoundToCall() {}
+ virtual void EmitARM64WinCFITrapFrame() {}
+ virtual void EmitARM64WinCFIMachineFrame() {}
+ virtual void EmitARM64WinCFIContext() {}
+ virtual void EmitARM64WinCFIClearUnwoundToCall() {}
private:
std::unique_ptr<AssemblerConstantPools> ConstantPools;
@@ -96,14 +96,14 @@ public:
// The unwind codes on ARM64 Windows are documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
void EmitARM64WinCFIAllocStack(unsigned Size) override;
- void EmitARM64WinCFISaveR19R20X(int Offset) override;
+ void EmitARM64WinCFISaveR19R20X(int Offset) override;
void EmitARM64WinCFISaveFPLR(int Offset) override;
void EmitARM64WinCFISaveFPLRX(int Offset) override;
void EmitARM64WinCFISaveReg(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegX(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegP(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveRegPX(unsigned Reg, int Offset) override;
- void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override;
+ void EmitARM64WinCFISaveLRPair(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFReg(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFRegX(unsigned Reg, int Offset) override;
void EmitARM64WinCFISaveFRegP(unsigned Reg, int Offset) override;
@@ -111,15 +111,15 @@ public:
void EmitARM64WinCFISetFP() override;
void EmitARM64WinCFIAddFP(unsigned Size) override;
void EmitARM64WinCFINop() override;
- void EmitARM64WinCFISaveNext() override;
+ void EmitARM64WinCFISaveNext() override;
void EmitARM64WinCFIPrologEnd() override;
void EmitARM64WinCFIEpilogStart() override;
void EmitARM64WinCFIEpilogEnd() override;
- void EmitARM64WinCFITrapFrame() override;
- void EmitARM64WinCFIMachineFrame() override;
- void EmitARM64WinCFIContext() override;
- void EmitARM64WinCFIClearUnwoundToCall() override;
-
+ void EmitARM64WinCFITrapFrame() override;
+ void EmitARM64WinCFIMachineFrame() override;
+ void EmitARM64WinCFIContext() override;
+ void EmitARM64WinCFIClearUnwoundToCall() override;
+
private:
void EmitARM64WinUnwindCode(unsigned UnwindCode, int Reg, int Offset);
};
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 1c50706a26..603446f40d 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -28,7 +28,7 @@ public:
void EmitWinEHHandlerData(SMLoc Loc) override;
void EmitWindowsUnwindTables() override;
- void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
+ void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) override;
void finishImpl() override;
};
@@ -37,14 +37,14 @@ void AArch64WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) {
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
- EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
- /* HandlerData = */ true);
-}
-
-void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
- EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
+ EHStreamer.EmitUnwindInfo(*this, getCurrentWinFrameInfo(),
+ /* HandlerData = */ true);
}
+void AArch64WinCOFFStreamer::EmitWindowsUnwindTables(WinEH::FrameInfo *Frame) {
+ EHStreamer.EmitUnwindInfo(*this, Frame, /* HandlerData = */ false);
+}
+
void AArch64WinCOFFStreamer::EmitWindowsUnwindTables() {
if (!getNumWinFrameInfos())
return;
@@ -91,10 +91,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIAllocStack(unsigned Size) {
EmitARM64WinUnwindCode(Op, -1, Size);
}
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) {
- EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveR19R20X(int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveR19R20X, -1, Offset);
+}
+
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFPLR(int Offset) {
EmitARM64WinUnwindCode(Win64EH::UOP_SaveFPLR, -1, Offset);
}
@@ -125,11 +125,11 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveRegPX(unsigned Reg,
EmitARM64WinUnwindCode(Win64EH::UOP_SaveRegPX, Reg, Offset);
}
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg,
- int Offset) {
- EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveLRPair(unsigned Reg,
+ int Offset) {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveLRPair, Reg, Offset);
+}
+
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveFReg(unsigned Reg,
int Offset) {
assert(Offset >= 0 && Offset <= 504 &&
@@ -165,10 +165,10 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFINop() {
EmitARM64WinUnwindCode(Win64EH::UOP_Nop, -1, 0);
}
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() {
- EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFISaveNext() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_SaveNext, -1, 0);
+}
+
// The functions below handle opcodes that can end up in either a prolog or
// an epilog, but not both.
void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIPrologEnd() {
@@ -207,22 +207,22 @@ void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIEpilogEnd() {
CurrentEpilog = nullptr;
}
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() {
- EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() {
- EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() {
- EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0);
-}
-
-void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() {
- EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0);
-}
-
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFITrapFrame() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_TrapFrame, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIMachineFrame() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_PushMachFrame, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIContext() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_Context, -1, 0);
+}
+
+void AArch64TargetWinCOFFStreamer::EmitARM64WinCFIClearUnwoundToCall() {
+ EmitARM64WinUnwindCode(Win64EH::UOP_ClearUnwoundToCall, -1, 0);
+}
+
MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
index 18b5c7460f..9a6f23a3c8 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc/ya.make
@@ -12,19 +12,19 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/BinaryFormat
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
- contrib/libs/llvm12/lib/Target/AArch64/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/BinaryFormat
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm12/lib/Target/AArch64/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td b/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
index 4eecf72862..0c31ac1f9a 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
+++ b/contrib/libs/llvm12/lib/Target/AArch64/SVEInstrFormats.td
@@ -206,20 +206,20 @@ def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>",
def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
-def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
-def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
-def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
-def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
+def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
+def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
+def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
+def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
-def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
-def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>;
-def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>;
-def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>;
-def SVEShiftImmR8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8, true>", []>;
-def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>;
-def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
-def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>;
+def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
+def SVEShiftImmL16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 15>", []>;
+def SVEShiftImmL32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 31>", []>;
+def SVEShiftImmL64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 63>", []>;
+def SVEShiftImmR8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 8, true>", []>;
+def SVEShiftImmR16 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 16, true>", []>;
+def SVEShiftImmR32 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 32, true>", []>;
+def SVEShiftImmR64 : ComplexPattern<i32, 1, "SelectSVEShiftImm<1, 64, true>", []>;
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
let Name = "SVEExactFPImmOperand" # Suffix;
@@ -280,8 +280,8 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
- let ElementSize = pprty.ElementSize;
- let isReMaterializable = 1;
+ let ElementSize = pprty.ElementSize;
+ let isReMaterializable = 1;
}
multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
@@ -317,18 +317,18 @@ class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst $Op1)>;
-class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
-: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
- (inst $Op3, $Op1, $Op2)>;
-
-// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
-// type of rounding. This is matched by timm0_1 in pattern below and ignored.
-class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
-: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
- (inst $Op3, $Op1, $Op2)>;
-
+class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>
+: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+
+// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
+// type of rounding. This is matched by timm0_1 in pattern below and ignored.
+class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>
+: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+
class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
@@ -354,11 +354,11 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
-class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
- ValueType pt, ValueType vt1, ValueType vt2,
- Instruction inst>
-: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
- (inst $Op1, $Op2)>;
+class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
+ ValueType pt, ValueType vt1, ValueType vt2,
+ Instruction inst>
+: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
+ (inst $Op1, $Op2)>;
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
@@ -418,23 +418,23 @@ class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst (ptrue 31), $Op1, $Op2)>;
-class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
- ValueType inreg_vt, Instruction inst>
-: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
- (inst $PassThru, $Pg, $Src)>;
-
-class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
- ValueType pt, ValueType it,
- ComplexPattern cast, Instruction inst>
-: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
- (inst $Pg, $Rn, i32:$imm)>;
-
-class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
- ValueType pt, ValueType it,
- ComplexPattern cast, Instruction inst>
-: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
- (inst $Rn, i32:$imm)>;
-
+class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
+ ValueType inreg_vt, Instruction inst>
+: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
+ (inst $PassThru, $Pg, $Src)>;
+
+class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ ComplexPattern cast, Instruction inst>
+: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+ (inst $Pg, $Rn, i32:$imm)>;
+
+class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ ComplexPattern cast, Instruction inst>
+: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
+ (inst $Rn, i32:$imm)>;
+
//
// Pseudo -> Instruction mappings
//
@@ -511,8 +511,8 @@ class sve_int_pfalse<bits<6> opc, string asm>
let Inst{9} = opc{0};
let Inst{8-4} = 0b00000;
let Inst{3-0} = Pd;
-
- let isReMaterializable = 1;
+
+ let isReMaterializable = 1;
}
class sve_int_ptest<bits<6> opc, string asm>
@@ -533,7 +533,7 @@ class sve_int_ptest<bits<6> opc, string asm>
let Inst{4-0} = 0b00000;
let Defs = [NZCV];
- let isCompare = 1;
+ let isCompare = 1;
}
class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
@@ -1014,8 +1014,8 @@ multiclass sve_int_perm_dup_i<string asm> {
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
}
-class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
- RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
+ RegisterOperand VecList>
: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
@@ -1057,8 +1057,8 @@ multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1101,11 +1101,11 @@ multiclass sve2_int_perm_tbl<string asm, SDPatternOperator op> {
(nxv2f64 (!cast<Instruction>(NAME # _D) (REG_SEQUENCE ZPR2, nxv2f64:$Op1, zsub0,
nxv2f64:$Op2, zsub1),
nxv2i64:$Op3))>;
-
- def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
- (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0,
- nxv8bf16:$Op2, zsub1),
- nxv8i16:$Op3))>;
+
+ def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)),
+ (nxv8bf16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0,
+ nxv8bf16:$Op2, zsub1),
+ nxv8i16:$Op3))>;
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1141,8 +1141,8 @@ multiclass sve2_int_perm_tbx<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, nxv8i16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -1173,8 +1173,8 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
@@ -1287,8 +1287,8 @@ multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, bf16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
@@ -1375,8 +1375,8 @@ multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
- def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
-
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
def : InstAlias<"mov $Zd, $Pg/m, $Zn",
@@ -1713,8 +1713,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
- SDPatternOperator predicated_op = null_frag> {
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator predicated_op = null_frag> {
def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@@ -1723,9 +1723,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
- def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2117,8 +2117,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
- SDPatternOperator op> {
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
@@ -2270,11 +2270,11 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f16, op, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2f16, op, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>;
- def : SVE_2_Op_Pat<nxv2f32, op, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2f16, op, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2f32, op, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
//===----------------------------------------------------------------------===//
@@ -2282,7 +2282,7 @@ multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
//===----------------------------------------------------------------------===//
class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
- RegisterOperand o_zprtype, ElementSizeEnum Sz>
+ RegisterOperand o_zprtype, ElementSizeEnum Sz>
: I<(outs o_zprtype:$Zd), (ins i_zprtype:$_Zd, PPR3bAny:$Pg, i_zprtype:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
@@ -2301,64 +2301,64 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
- let ElementSize = Sz;
+ let ElementSize = Sz;
}
multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
RegisterOperand i_zprtype,
RegisterOperand o_zprtype,
- SDPatternOperator int_op,
- SDPatternOperator ir_op, ValueType vt1,
+ SDPatternOperator int_op,
+ SDPatternOperator ir_op, ValueType vt1,
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
- // convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
- 1 : vt1);
-
- // convert vt3 to a packed type for the intrinsic patterns
- defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
- 1 : vt3);
-
- def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
-
- def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
-}
-
-multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
- RegisterOperand i_zprtype,
- RegisterOperand o_zprtype,
- SDPatternOperator int_op,
- SDPatternOperator ir_op, ValueType vt1,
- ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
- def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
-
- // convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
- 1 : vt1);
-
- def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
-
- def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
-}
-
+ // convert vt1 to a packed type for the intrinsic patterns
+ defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
+ 1 : vt1);
+
+ // convert vt3 to a packed type for the intrinsic patterns
+ defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
+ 1 : vt3);
+
+ def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
+
+ def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
+}
+
+multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
+ RegisterOperand i_zprtype,
+ RegisterOperand o_zprtype,
+ SDPatternOperator int_op,
+ SDPatternOperator ir_op, ValueType vt1,
+ ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
+ def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
+
+ // convert vt1 to a packed type for the intrinsic patterns
+ defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
+ !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
+ 1 : vt1);
+
+ def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
+
+ def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
+}
+
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
- def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
@@ -2466,19 +2466,19 @@ multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, string Ps,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
- SDPatternOperator op,
- DestructiveInstTypeEnum flags> {
- let DestructiveInstType = flags in {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>,
- SVEPseudo2Instr<Ps # _B, 1>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>,
- SVEPseudo2Instr<Ps # _H, 1>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>,
- SVEPseudo2Instr<Ps # _S, 1>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>,
- SVEPseudo2Instr<Ps # _D, 1>;
- }
+multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2486,19 +2486,19 @@ multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, string Ps,
- SDPatternOperator op,
- DestructiveInstTypeEnum flags> {
- let DestructiveInstType = flags in {
- def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>,
- SVEPseudo2Instr<Ps # _B, 1>;
- def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>,
- SVEPseudo2Instr<Ps # _H, 1>;
- def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>,
- SVEPseudo2Instr<Ps # _S, 1>;
- def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>,
- SVEPseudo2Instr<Ps # _D, 1>;
- }
+multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, string Ps,
+ SDPatternOperator op,
+ DestructiveInstTypeEnum flags> {
+ let DestructiveInstType = flags in {
+ def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>;
+ def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -2588,8 +2588,8 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
- SDPatternOperator outerop, SDPatternOperator mulop> {
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
+ SDPatternOperator outerop, SDPatternOperator mulop> {
def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
@@ -2599,15 +2599,15 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)),
- (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)),
- (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)),
- (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>;
- def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)),
- (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>;
+
+ def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)),
+ (!cast<Instruction>(NAME # _B) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)),
+ (!cast<Instruction>(NAME # _H) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)),
+ (!cast<Instruction>(NAME # _S) $pred, $Op1, $Op2, $Op3)>;
+ def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)),
+ (!cast<Instruction>(NAME # _D) $pred, $Op1, $Op2, $Op3)>;
}
//===----------------------------------------------------------------------===//
@@ -2711,8 +2711,8 @@ multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
// SVE2 Integer Multiply-Add Long - Indexed Group
//===----------------------------------------------------------------------===//
-multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm,
- SDPatternOperator op> {
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> {
bits<3> Zm;
@@ -2962,8 +2962,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
}
-multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
- SDPatternOperator op_pred = null_frag> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_pred = null_frag> {
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
@@ -2973,11 +2973,11 @@ multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3531,8 +3531,8 @@ multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
- SDPatternOperator op> {
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
ZPR32, ZPR32>;
def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
@@ -3576,7 +3576,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ tvecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
@@ -3616,7 +3616,7 @@ multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ tvecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
@@ -3777,10 +3777,10 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
@@ -3789,9 +3789,9 @@ multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
- def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
- def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
@@ -3799,15 +3799,15 @@ multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
- def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
SDPatternOperator op> {
def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
@@ -3817,23 +3817,23 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4002,10 +4002,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
@@ -4014,10 +4014,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4130,7 +4130,7 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
-
+
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
@@ -4289,8 +4289,8 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
- let ElementSize = pprty.ElementSize;
- let isPTestLike = 1;
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
@@ -4363,7 +4363,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
- let isPTestLike = 1;
+ let isPTestLike = 1;
}
multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
@@ -4423,8 +4423,8 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
let Defs = [NZCV];
- let ElementSize = pprty.ElementSize;
- let isPTestLike = 1;
+ let ElementSize = pprty.ElementSize;
+ let isPTestLike = 1;
}
multiclass sve_int_ucmp_vi<bits<2> opc, string asm, CondCode cc,
@@ -4469,7 +4469,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
}
class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
- RegisterClass gprty, PPRRegOp pprty>
+ RegisterClass gprty, PPRRegOp pprty>
: I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
asm, "\t$Pd, $Rn, $Rm",
"", []>, Sched<[]> {
@@ -4487,32 +4487,32 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
- let ElementSize = pprty.ElementSize;
- let isWhile = 1;
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
- def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
- def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
- def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+ def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
def : SVE_2_Op_Pat<nxv16i1, op, i32, i32, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
- def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
- def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
- def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+ def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
+ def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
+ def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
+ def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
def : SVE_2_Op_Pat<nxv16i1, op, i64, i64, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
@@ -4533,8 +4533,8 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Inst{3-0} = Pd;
let Defs = [NZCV];
- let ElementSize = pprty.ElementSize;
- let isWhile = 1;
+ let ElementSize = pprty.ElementSize;
+ let isWhile = 1;
}
multiclass sve2_int_while_rr<bits<1> rw, string asm, string op> {
@@ -4577,10 +4577,10 @@ multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -4616,10 +4616,10 @@ multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
@@ -4840,11 +4840,11 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<nxv4i32, op, i32, i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, i64, i64, !cast<Instruction>(NAME # _D)>;
}
-
+
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
-
+
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
@@ -4869,19 +4869,19 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
- SDPatternOperator op = null_frag> {
- def _B : SVEPseudo2Instr<Ps # _B, 1>,
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
+ SDPatternOperator op = null_frag> {
+ def _B : SVEPseudo2Instr<Ps # _B, 1>,
sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : SVEPseudo2Instr<Ps # _H, 1>,
+ def _H : SVEPseudo2Instr<Ps # _H, 1>,
sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{8} = imm{3};
}
- def _S : SVEPseudo2Instr<Ps # _S, 1>,
+ def _S : SVEPseudo2Instr<Ps # _S, 1>,
sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{9-8} = imm{4-3};
}
- def _D : SVEPseudo2Instr<Ps # _D, 1>,
+ def _D : SVEPseudo2Instr<Ps # _D, 1>,
sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
@@ -4893,16 +4893,16 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
-// As above but shift amount takes the form of a "vector immediate".
-multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
- string Ps, SDPatternOperator op>
-: sve_int_bin_pred_shift_imm_left<opc, asm, Ps, null_frag> {
- def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
-}
-
+// As above but shift amount takes the form of a "vector immediate".
+multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
+ string Ps, SDPatternOperator op>
+: sve_int_bin_pred_shift_imm_left<opc, asm, Ps, null_frag> {
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
@@ -4939,16 +4939,16 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
-// As above but shift amount takes the form of a "vector immediate".
-multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
- string Ps, SDPatternOperator op>
-: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> {
- def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
-}
-
+// As above but shift amount takes the form of a "vector immediate".
+multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
+ string Ps, SDPatternOperator op>
+: sve_int_bin_pred_shift_imm_right<opc, asm, Ps, null_frag> {
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
@@ -5089,10 +5089,10 @@ multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
@@ -5109,12 +5109,12 @@ multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
}
-
+
//===----------------------------------------------------------------------===//
// SVE Memory - Store Group
//===----------------------------------------------------------------------===//
@@ -5623,7 +5623,7 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
PPRRegOp pprty>
: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
asm, "\t$Pd, $Pn, $Pm",
- "", []>, Sched<[]> {
+ "", []>, Sched<[]> {
bits<4> Pd;
bits<4> Pm;
bits<4> Pn;
@@ -5689,7 +5689,7 @@ class sve_int_rdffr_pred<bit s, string asm>
let Inst{4} = 0;
let Inst{3-0} = Pd;
- let Defs = !if(s, [NZCV], []);
+ let Defs = !if(s, [NZCV], []);
let Uses = [FFR];
}
@@ -5816,11 +5816,11 @@ multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>;
def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>;
- def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_3_Op_Pat<bf16, op, nxv8i1, bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_3_Op_Pat<bf16, op, nxv8i1, bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
@@ -5860,8 +5860,8 @@ multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
@@ -5924,8 +5924,8 @@ multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_2_Op_Pat<bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_2_Op_Pat<bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
@@ -5962,8 +5962,8 @@ multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
-
- def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
@@ -6019,20 +6019,20 @@ multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
def _S : sve_int_perm_rev<0b10, 0b11, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b11, asm, ZPR64>;
- def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_perm_rev_revb<string asm, SDPatternOperator op> {
+multiclass sve_int_perm_rev_revb<string asm, SDPatternOperator op> {
def _H : sve_int_perm_rev<0b01, 0b00, asm, ZPR16>;
def _S : sve_int_perm_rev<0b10, 0b00, asm, ZPR32>;
def _D : sve_int_perm_rev<0b11, 0b00, asm, ZPR64>;
- def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_perm_rev_revh<string asm, SDPatternOperator op> {
@@ -6139,9 +6139,9 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
(!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)),
(!cast<Instruction>(NAME # _D) $passthru, $pg, $splat)>;
-
- def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
- (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
+
+ def : Pat<(nxv8bf16 (op nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
+ (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
}
class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
@@ -6194,8 +6194,8 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let Inst{4-0} = Zt;
let mayLoad = 1;
- let Uses = !if(nf, [FFR], []);
- let Defs = !if(nf, [FFR], []);
+ let Uses = !if(nf, [FFR], []);
+ let Defs = !if(nf, [FFR], []);
}
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6397,8 +6397,8 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
let Inst{4-0} = Zt;
let mayLoad = 1;
- let Uses = !if(ff, [FFR], []);
- let Defs = !if(ff, [FFR], []);
+ let Uses = !if(ff, [FFR], []);
+ let Defs = !if(ff, [FFR], []);
}
multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -7227,8 +7227,8 @@ multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
//===----------------------------------------------------------------------===//
class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
- ZPRRegOp zprty, FPRasZPROperand dstOpType>
-: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
+ ZPRRegOp zprty, FPRasZPROperand dstOpType>
+: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
asm, "\t$Vd, $Pg, $Zn",
"",
[]>, Sched<[]> {
@@ -7246,54 +7246,54 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
let Inst{4-0} = Vd;
}
-multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
- def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
- def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
+multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
}
-multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
- def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
- def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
- def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64asZPR>;
+ def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64asZPR>;
+ def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64asZPR>;
+ def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_reduce_1<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>;
- def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>;
- def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>;
- def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_1<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8asZPR>;
+ def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_reduce_2<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>;
- def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>;
- def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>;
- def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>;
+multiclass sve_int_reduce_2<bits<3> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8asZPR>;
+ def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16asZPR>;
+ def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32asZPR>;
+ def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64asZPR>;
- def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
@@ -7398,7 +7398,7 @@ class sve_int_brkn<bit S, string asm>
let Inst{3-0} = Pdm;
let Constraints = "$Pdm = $_Pdm";
- let Defs = !if(S, [NZCV], []);
+ let Defs = !if(S, [NZCV], []);
}
multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -7900,8 +7900,8 @@ multiclass sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand listty,
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
- def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
- (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
+ def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))),
+ (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>;
}
//===----------------------------------------------------------------------===//
@@ -7935,7 +7935,7 @@ multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatter
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME)>;
- def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
+ def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
}
/// Addressing modes
@@ -7954,10 +7954,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
@@ -7982,19 +7982,19 @@ multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
-
-// Predicated pseudo integer two operand instructions. Second operand is an
-// immediate specified by imm_[bhsd].
-multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
- ComplexPattern imm_b, ComplexPattern imm_h,
- ComplexPattern imm_s, ComplexPattern imm_d> {
- def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
-
- def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
-}
+
+// Predicated pseudo integer two operand instructions. Second operand is an
+// immediate specified by imm_[bhsd].
+multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
+ ComplexPattern imm_b, ComplexPattern imm_h,
+ ComplexPattern imm_s, ComplexPattern imm_d> {
+ def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
+
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
+}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 9911f33371..e312d9d28b 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -37,7 +37,7 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
+#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
namespace llvm {
void initializeSVEIntrinsicOptsPass(PassRegistry &);
@@ -177,50 +177,50 @@ bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
if (isa<PHINode>(I->getArgOperand(0)))
return processPhiNode(I);
- SmallVector<Instruction *, 32> CandidatesForRemoval;
- Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
-
- const auto *IVTy = cast<VectorType>(I->getType());
-
- // Walk the chain of conversions.
- while (Cursor) {
- // If the type of the cursor has fewer lanes than the final result, zeroing
- // must take place, which breaks the equivalence chain.
- const auto *CursorVTy = cast<VectorType>(Cursor->getType());
- if (CursorVTy->getElementCount().getKnownMinValue() <
- IVTy->getElementCount().getKnownMinValue())
- break;
-
- // If the cursor has the same type as I, it is a viable replacement.
- if (Cursor->getType() == IVTy)
- EarliestReplacement = Cursor;
-
- auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
-
- // If this is not an SVE conversion intrinsic, this is the end of the chain.
- if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
- Intrinsic::aarch64_sve_convert_to_svbool ||
- IntrinsicCursor->getIntrinsicID() ==
- Intrinsic::aarch64_sve_convert_from_svbool))
- break;
-
- CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
- Cursor = IntrinsicCursor->getOperand(0);
- }
-
- // If no viable replacement in the conversion chain was found, there is
- // nothing to do.
- if (!EarliestReplacement)
+ SmallVector<Instruction *, 32> CandidatesForRemoval;
+ Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
+
+ const auto *IVTy = cast<VectorType>(I->getType());
+
+ // Walk the chain of conversions.
+ while (Cursor) {
+ // If the type of the cursor has fewer lanes than the final result, zeroing
+ // must take place, which breaks the equivalence chain.
+ const auto *CursorVTy = cast<VectorType>(Cursor->getType());
+ if (CursorVTy->getElementCount().getKnownMinValue() <
+ IVTy->getElementCount().getKnownMinValue())
+ break;
+
+ // If the cursor has the same type as I, it is a viable replacement.
+ if (Cursor->getType() == IVTy)
+ EarliestReplacement = Cursor;
+
+ auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
+
+ // If this is not an SVE conversion intrinsic, this is the end of the chain.
+ if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_to_svbool ||
+ IntrinsicCursor->getIntrinsicID() ==
+ Intrinsic::aarch64_sve_convert_from_svbool))
+ break;
+
+ CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
+ Cursor = IntrinsicCursor->getOperand(0);
+ }
+
+ // If no viable replacement in the conversion chain was found, there is
+ // nothing to do.
+ if (!EarliestReplacement)
return false;
- I->replaceAllUsesWith(EarliestReplacement);
+ I->replaceAllUsesWith(EarliestReplacement);
I->eraseFromParent();
- while (!CandidatesForRemoval.empty()) {
- Instruction *Candidate = CandidatesForRemoval.pop_back_val();
- if (Candidate->use_empty())
- Candidate->eraseFromParent();
- }
+ while (!CandidatesForRemoval.empty()) {
+ Instruction *Candidate = CandidatesForRemoval.pop_back_val();
+ if (Candidate->use_empty())
+ Candidate->eraseFromParent();
+ }
return true;
}
@@ -276,8 +276,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
case Intrinsic::aarch64_sve_ptest_any:
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
- for (User *U : F.users())
- Functions.insert(cast<Instruction>(U)->getFunction());
+ for (User *U : F.users())
+ Functions.insert(cast<Instruction>(U)->getFunction());
break;
default:
break;
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
index bb7d4a2c89..cf2f9565d1 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/TargetInfo/ya.make
@@ -12,13 +12,13 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12
+ contrib/libs/llvm12/lib/Support
)
ADDINCL(
- contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ac59d73fd9..8a90a74841 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -26,13 +26,13 @@ namespace llvm {
namespace llvm {
- namespace AArch64DBnXS {
-#define GET_DBNXS_IMPL
-#include "AArch64GenSystemOperands.inc"
- }
-}
-
-namespace llvm {
+ namespace AArch64DBnXS {
+#define GET_DBNXS_IMPL
+#include "AArch64GenSystemOperands.inc"
+ }
+}
+
+namespace llvm {
namespace AArch64DB {
#define GET_DB_IMPL
#include "AArch64GenSystemOperands.inc"
@@ -165,7 +165,7 @@ std::string AArch64SysReg::genericRegisterString(uint32_t Bits) {
namespace llvm {
namespace AArch64TLBI {
-#define GET_TLBITable_IMPL
+#define GET_TLBITable_IMPL
#include "AArch64GenSystemOperands.inc"
}
}
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 1b13c94389..6d737ac8e1 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -338,14 +338,14 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
-struct SysAliasImm : SysAlias {
- uint16_t ImmValue;
- constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
- : SysAlias(N, E), ImmValue(I) {}
- constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I, FeatureBitset F)
- : SysAlias(N, E, F), ImmValue(I) {}
-};
-
+struct SysAliasImm : SysAlias {
+ uint16_t ImmValue;
+ constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
+ : SysAlias(N, E), ImmValue(I) {}
+ constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I, FeatureBitset F)
+ : SysAlias(N, E, F), ImmValue(I) {}
+};
+
namespace AArch64AT{
struct AT : SysAlias {
using SysAlias::SysAlias;
@@ -362,14 +362,14 @@ namespace AArch64DB {
#include "AArch64GenSystemOperands.inc"
}
-namespace AArch64DBnXS {
- struct DBnXS : SysAliasImm {
- using SysAliasImm::SysAliasImm;
- };
- #define GET_DBNXS_DECL
- #include "AArch64GenSystemOperands.inc"
-}
-
+namespace AArch64DBnXS {
+ struct DBnXS : SysAliasImm {
+ using SysAliasImm::SysAliasImm;
+ };
+ #define GET_DBNXS_DECL
+ #include "AArch64GenSystemOperands.inc"
+}
+
namespace AArch64DC {
struct DC : SysAlias {
using SysAlias::SysAlias;
@@ -568,7 +568,7 @@ namespace AArch64TLBI {
struct TLBI : SysAliasReg {
using SysAliasReg::SysAliasReg;
};
- #define GET_TLBITable_DECL
+ #define GET_TLBITable_DECL
#include "AArch64GenSystemOperands.inc"
}
@@ -622,7 +622,7 @@ namespace AArch64II {
MO_HI12 = 7,
/// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
- /// reference is actually to the ".refptr.FOO" symbol. This is used for
+ /// reference is actually to the ".refptr.FOO" symbol. This is used for
/// stub symbols on windows.
MO_COFFSTUB = 0x8,
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
index 3668c2a650..37d19feb17 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/Utils/ya.make
@@ -12,15 +12,15 @@ LICENSE(Apache-2.0 WITH LLVM-exception)
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Support
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64/Utils
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64/Utils
)
NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/llvm12/lib/Target/AArch64/ya.make b/contrib/libs/llvm12/lib/Target/AArch64/ya.make
index 244cbc7f34..0c05f2840f 100644
--- a/contrib/libs/llvm12/lib/Target/AArch64/ya.make
+++ b/contrib/libs/llvm12/lib/Target/AArch64/ya.make
@@ -15,28 +15,28 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
PEERDIR(
- contrib/libs/llvm12
- contrib/libs/llvm12/include
- contrib/libs/llvm12/lib/Analysis
- contrib/libs/llvm12/lib/CodeGen
- contrib/libs/llvm12/lib/CodeGen/AsmPrinter
- contrib/libs/llvm12/lib/CodeGen/GlobalISel
- contrib/libs/llvm12/lib/CodeGen/SelectionDAG
- contrib/libs/llvm12/lib/IR
- contrib/libs/llvm12/lib/MC
- contrib/libs/llvm12/lib/Support
- contrib/libs/llvm12/lib/Target
- contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
- contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
- contrib/libs/llvm12/lib/Target/AArch64/Utils
- contrib/libs/llvm12/lib/Transforms/CFGuard
- contrib/libs/llvm12/lib/Transforms/Scalar
- contrib/libs/llvm12/lib/Transforms/Utils
+ contrib/libs/llvm12
+ contrib/libs/llvm12/include
+ contrib/libs/llvm12/lib/Analysis
+ contrib/libs/llvm12/lib/CodeGen
+ contrib/libs/llvm12/lib/CodeGen/AsmPrinter
+ contrib/libs/llvm12/lib/CodeGen/GlobalISel
+ contrib/libs/llvm12/lib/CodeGen/SelectionDAG
+ contrib/libs/llvm12/lib/IR
+ contrib/libs/llvm12/lib/MC
+ contrib/libs/llvm12/lib/Support
+ contrib/libs/llvm12/lib/Target
+ contrib/libs/llvm12/lib/Target/AArch64/MCTargetDesc
+ contrib/libs/llvm12/lib/Target/AArch64/TargetInfo
+ contrib/libs/llvm12/lib/Target/AArch64/Utils
+ contrib/libs/llvm12/lib/Transforms/CFGuard
+ contrib/libs/llvm12/lib/Transforms/Scalar
+ contrib/libs/llvm12/lib/Transforms/Utils
)
ADDINCL(
- ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
- contrib/libs/llvm12/lib/Target/AArch64
+ ${ARCADIA_BUILD_ROOT}/contrib/libs/llvm12/lib/Target/AArch64
+ contrib/libs/llvm12/lib/Target/AArch64
)
NO_COMPILER_WARNINGS()
@@ -88,8 +88,8 @@ SRCS(
GISel/AArch64InstructionSelector.cpp
GISel/AArch64LegalizerInfo.cpp
GISel/AArch64PostLegalizerCombiner.cpp
- GISel/AArch64PostLegalizerLowering.cpp
- GISel/AArch64PostSelectOptimize.cpp
+ GISel/AArch64PostLegalizerLowering.cpp
+ GISel/AArch64PostSelectOptimize.cpp
GISel/AArch64PreLegalizerCombiner.cpp
GISel/AArch64RegisterBankInfo.cpp
SVEIntrinsicOpts.cpp