diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td | 144 |
1 files changed, 72 insertions, 72 deletions
diff --git a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td index 0f5d53b57d..a8c0d05d91 100644 --- a/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/libs/llvm12/lib/Target/ARM/ARMInstrNEON.td @@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; -def NEONvbsp : SDNode<"ARMISD::VBSP", +def NEONvbsp : SDNode<"ARMISD::VBSP", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -4197,10 +4197,10 @@ def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, "vaddl", "s", add, sext, 1>; defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", add, zanyext, 1>; + "vaddl", "u", add, zanyext, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; -defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -4512,9 +4512,9 @@ let Predicates = [HasNEON, HasV8_1a] in { (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), - (v4i32 (int_arm_neon_vqrdmulh + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), - (v4i32 (ARMvduplane (v4i32 QPR:$src3), + (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane)))))), (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), @@ -4565,17 +4565,17 @@ let Predicates = [HasNEON, HasV8_1a] in { (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), - (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), - (v8i16 (ARMvduplane (v8i16 QPR:$src3), + (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane)))))), (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), - (v4i16 (EXTRACT_SUBREG + (v4i16 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; @@ -4587,7 +4587,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), - (v2i32 (EXTRACT_SUBREG + (v2i32 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; @@ -5045,10 +5045,10 @@ def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, "vsubl", "s", sub, sext, 0>; defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", sub, zanyext, 0>; + "vsubl", "u", sub, zanyext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; -defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -5259,9 +5259,9 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), - (xor node:$in, ARMimmAllOnesD)>; + (xor node:$in, ARMimmAllOnesD)>; def vnotq : PatFrag<(ops node:$in), - (xor node:$in, ARMimmAllOnesV)>; + (xor node:$in, ARMimmAllOnesV)>; // VAND : Vector Bitwise AND @@ -5428,84 +5428,84 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; } -// The TwoAddress pass will not go looking for equivalent operations -// with different register constraints; it just inserts copies. -// That is why pseudo VBSP implemented. Is is expanded later into -// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. -def VBSPd - : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - IIC_VBINiD, "", - [(set DPR:$Vd, - (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +// The TwoAddress pass will not go looking for equivalent operations +// with different register constraints; it just inserts copies. +// That is why pseudo VBSP implemented. Is is expanded later into +// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. +def VBSPd + : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + IIC_VBINiD, "", + [(set DPR:$Vd, + (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; let Predicates = [HasNEON] in { def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), - (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), - (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), - (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), - (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), - (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; } -def VBSPq - : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - IIC_VBINiQ, "", - [(set QPR:$Vd, - (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def VBSPq + : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + IIC_VBINiQ, "", + [(set QPR:$Vd, + (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; let Predicates = [HasNEON] in { def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), - (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), - (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), - (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), - (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), - (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; -} - -// VBSL : Vector Bitwise Select -def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - N3RegFrm, IIC_VBINiD, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; - -def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - N3RegFrm, IIC_VBINiQ, - "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - []>; - + (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; +} + +// VBSL : Vector Bitwise Select +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VBINiD, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VBINiQ, + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + []>; + // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, @@ -6040,9 +6040,9 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, // Vector Negate. def vnegd : PatFrag<(ops node:$in), - (sub ARMimmAllZerosD, node:$in)>; + (sub ARMimmAllZerosD, node:$in)>; def vnegq : PatFrag<(ops node:$in), - (sub ARMimmAllZerosV, node:$in)>; + (sub ARMimmAllZerosV, node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), @@ -6256,11 +6256,11 @@ defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, - [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], + [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, Requires<[HasZCZ]>; def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, - [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], + [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, Requires<[HasZCZ]>; } @@ -7946,7 +7946,7 @@ let Predicates = [HasNEON,IsLE] in { (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; } -// The following patterns are basically a copy of the patterns above, +// The following patterns are basically a copy of the patterns above, // however with an additional VREV16d instruction to convert data // loaded by VLD1LN into proper vector format in big endian mode. let Predicates = [HasNEON,IsBE] in { @@ -9079,11 +9079,11 @@ multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; } -def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; -def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; +def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; +def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; -defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; -defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; +defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; +defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; class BF16MM<bit Q, RegisterClass RegTy, string opc> @@ -9091,8 +9091,8 @@ class BF16MM<bit Q, RegisterClass RegTy, (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, "", "", [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), - (v8bf16 QPR:$Vn), - (v8bf16 QPR:$Vm)))]> { + (v8bf16 QPR:$Vn), + (v8bf16 QPR:$Vm)))]> { let Constraints = "$dst = $Vd"; let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); let DecoderNamespace = "VFPV8"; @@ -9106,8 +9106,8 @@ class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", [(set (v4f32 QPR:$dst), (OpNode (v4f32 QPR:$Vd), - (v8bf16 QPR:$Vn), - (v8bf16 QPR:$Vm)))]> { + (v8bf16 QPR:$Vn), + (v8bf16 QPR:$Vm)))]> { let Constraints = "$dst = $Vd"; let DecoderNamespace = "VFPV8"; } @@ -9128,9 +9128,9 @@ multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { def : Pat< (v4f32 (OpNode (v4f32 QPR:$Vd), - (v8bf16 QPR:$Vn), - (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), - VectorIndex16:$lane)))), + (v8bf16 QPR:$Vn), + (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), + VectorIndex16:$lane)))), (!cast<Instruction>(NAME) QPR:$Vd, QPR:$Vn, (EXTRACT_SUBREG QPR:$Vm, |