aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td')
-rw-r--r--contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td248
1 files changed, 124 insertions, 124 deletions
diff --git a/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td b/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td
index a185a2007b..29ac01b143 100644
--- a/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td
@@ -1242,8 +1242,8 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
- ExeDomain = SSEPackedSingle in {
+let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
+ ExeDomain = SSEPackedSingle in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1261,7 +1261,7 @@ def : Pat<(f32 (any_fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
-let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (any_fpround FR64:$src))]>,
@@ -1273,7 +1273,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
}
-let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
+let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1307,7 +1307,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
+let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1327,7 +1327,7 @@ def : Pat<(f64 (any_fpextend FR32:$src)),
def : Pat<(any_fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
-let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (any_fpextend FR32:$src))]>,
@@ -1339,8 +1339,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
} // isCodeGenOnly = 1
-let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
- ExeDomain = SSEPackedSingle in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
+ ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3930,7 +3930,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
+ (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
@@ -3940,7 +3940,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- timm:$src3))]>,
+ timm:$src3))]>,
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
@@ -3950,13 +3950,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- timm:$src2))]>,
+ timm:$src2))]>,
PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
- timm:$src2))]>,
+ timm:$src2))]>,
Sched<[WriteVecExtract]>;
// Insert
@@ -4756,7 +4756,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
@@ -4802,7 +4802,7 @@ let isCommutable = 0 in {
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -5153,14 +5153,14 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
- timm:$src2))]>,
+ timm:$src2))]>,
Sched<[WriteVecExtract]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
+ [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))),
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
@@ -5184,7 +5184,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
+ [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))),
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
@@ -5274,7 +5274,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
+ (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>,
Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
@@ -5283,7 +5283,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>,
Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
}
@@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
+ defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
}
@@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
+ defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
}
@@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
+ defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
}
@@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
+ defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
}
@@ -7016,19 +7016,19 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
// NOTE: We're using FP instructions here, but execution domain fixing can
// convert to integer when profitable.
-def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
+def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
}
@@ -7164,68 +7164,68 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
WriteFMaskMove64, WriteFMaskMove64Y>;
//===----------------------------------------------------------------------===//
-// AVX_VNNI
-//===----------------------------------------------------------------------===//
-let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in
-multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit IsCommutable> {
- let isCommutable = IsCommutable in
- def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
- VR128:$src2, VR128:$src3)))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
-
- def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
- (loadv4i32 addr:$src3))))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
-
- let isCommutable = IsCommutable in
- def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
- VR256:$src2, VR256:$src3)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
-
- def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3),
- !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
- (loadv8i32 addr:$src3))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
-}
-
-defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix;
-defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix;
-defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix;
-defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix;
-
-def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
- (X86vpmaddwd node:$lhs, node:$rhs), [{
- return N->hasOneUse();
-}]>;
-
-let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
- def : Pat<(v8i32 (add VR256:$src1,
- (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
- (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
- def : Pat<(v8i32 (add VR256:$src1,
- (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
- (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
- def : Pat<(v4i32 (add VR128:$src1,
- (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
- (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
- def : Pat<(v4i32 (add VR128:$src1,
- (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
- (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
-}
-
-//===----------------------------------------------------------------------===//
+// AVX_VNNI
+//===----------------------------------------------------------------------===//
+let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in
+multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit IsCommutable> {
+ let isCommutable = IsCommutable in
+ def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (v4i32 (OpNode VR128:$src1,
+ VR128:$src2, VR128:$src3)))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
+ (loadv4i32 addr:$src3))))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ let isCommutable = IsCommutable in
+ def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
+ VR256:$src2, VR256:$src3)))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+
+ def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
+ (loadv8i32 addr:$src3))))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+}
+
+defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix;
+defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix;
+defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix;
+defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix;
+
+def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86vpmaddwd node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
+ def : Pat<(v8i32 (add VR256:$src1,
+ (X86vpmaddwd_su VR256:$src2, VR256:$src3))),
+ (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(v8i32 (add VR256:$src1,
+ (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))),
+ (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(v4i32 (add VR128:$src1,
+ (X86vpmaddwd_su VR128:$src2, VR128:$src3))),
+ (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add VR128:$src1,
+ (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))),
+ (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
+}
+
+//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
//
@@ -7287,12 +7287,12 @@ let ExeDomain = SSEPackedSingle in {
let isCommutable = 1 in
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
// Immediate transform to help with commuting.
@@ -7300,27 +7300,27 @@ def Perm2XCommuteImm : SDNodeXForm<timm, [{
return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
}]>;
-multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
- // Pattern with load in other operand.
- def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
- (Perm2XCommuteImm timm:$imm))>;
-}
-
+multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
+ // Pattern with load in other operand.
+ def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
+ (Perm2XCommuteImm timm:$imm))>;
+}
+
let Predicates = [HasAVX] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
+ defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
}
let Predicates = [HasAVX1Only] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
+ defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7689,24 +7689,24 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
WriteFShuffle256, f256mem>, VEX_W;
//===----------------------------------------------------------------------===//
-// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
+// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
//
let isCommutable = 1 in
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ Sched<[WriteShuffle256]>, VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
-let Predicates = [HasAVX2] in {
- defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
-}
+let Predicates = [HasAVX2] in {
+ defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
+}
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values