diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td | 248 |
1 files changed, 124 insertions, 124 deletions
diff --git a/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td b/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td index a185a2007b..29ac01b143 100644 --- a/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td +++ b/contrib/libs/llvm12/lib/Target/X86/X86InstrSSE.td @@ -1242,8 +1242,8 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", /// SSE 2 Only // Convert scalar double to scalar single -let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], - ExeDomain = SSEPackedSingle in { +let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX], + ExeDomain = SSEPackedSingle in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -1261,7 +1261,7 @@ def : Pat<(f32 (any_fpround FR64:$src)), (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; -let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { +let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (any_fpround FR64:$src))]>, @@ -1273,7 +1273,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; } -let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { +let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1307,7 +1307,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, // Convert scalar single to scalar double // SSE2 instructions with XS prefix -let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { +let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -1327,7 +1327,7 @@ def : Pat<(f64 (any_fpextend FR32:$src)), def : Pat<(any_fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; -let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { +let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (any_fpextend FR32:$src))]>, @@ -1339,8 +1339,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; } // isCodeGenOnly = 1 -let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, - ExeDomain = SSEPackedSingle in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, + ExeDomain = SSEPackedSingle in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { VEX_4V, VEX_WIG; defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, i128mem, SchedWriteShuffle.XMM, load, 0>, - VEX_4V, VEX_WIG; + VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { @@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { VEX_4V, VEX_L, VEX_WIG; defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, i256mem, SchedWriteShuffle.YMM, load, 0>, - VEX_4V, VEX_L, VEX_WIG; + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { @@ -3930,7 +3930,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, + (X86pinsrw VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; def rm : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, @@ -3940,7 +3940,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - timm:$src3))]>, + timm:$src3))]>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } @@ -3950,13 +3950,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), - timm:$src2))]>, + timm:$src2))]>, PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>; def PEXTRWrr : PDIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), - timm:$src2))]>, + timm:$src2))]>, Sched<[WriteVecExtract]>; // Insert @@ -4756,7 +4756,7 @@ let isCommutable = 0 in { SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, load, i128mem, - SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; + SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG; @@ -4802,7 +4802,7 @@ let isCommutable = 0 in { SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, load, i256mem, - SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; + SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, @@ -5153,14 +5153,14 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), - timm:$src2))]>, + timm:$src2))]>, Sched<[WriteVecExtract]>; let hasSideEffects = 0, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i8mem:$dst, VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), + [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), timm:$src2))), addr:$dst)]>, Sched<[WriteVecExtractSt]>; } @@ -5184,7 +5184,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { (ins i16mem:$dst, VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), + [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), timm:$src2))), addr:$dst)]>, Sched<[WriteVecExtractSt]>; } @@ -5274,7 +5274,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, + (X86pinsrb VR128:$src1, GR32orGR64:$src2, timm:$src3))]>, Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i8mem:$src2, u8imm:$src3), @@ -5283,7 +5283,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, + (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), timm:$src3))]>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } @@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> { let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; + defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG; defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ; } @@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> { let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; + defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG; defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">; } @@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> { let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; + defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG; defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; } @@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> { let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { let Predicates = [HasAVX] in - defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; + defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG; defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; } @@ -7016,19 +7016,19 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; let Predicates = [HasAVX, NoVLX] in { -def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; -def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; // NOTE: We're using FP instructions here, but execution domain fixing can // convert to integer when profitable. -def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; -def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; -def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; -def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; } @@ -7164,68 +7164,68 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", WriteFMaskMove64, WriteFMaskMove64Y>; //===----------------------------------------------------------------------===// -// AVX_VNNI -//===----------------------------------------------------------------------===// -let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in -multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable> { - let isCommutable = IsCommutable in - def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, - VR128:$src2, VR128:$src3)))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; - - def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i128mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, - (loadv4i32 addr:$src3))))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; - - let isCommutable = IsCommutable in - def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, - VR256:$src2, VR256:$src3)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; - - def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, i256mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, - (loadv8i32 addr:$src3))))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; -} - -defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix; -defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix; -defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix; -defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix; - -def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), - (X86vpmaddwd node:$lhs, node:$rhs), [{ - return N->hasOneUse(); -}]>; - -let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { - def : Pat<(v8i32 (add VR256:$src1, - (X86vpmaddwd_su VR256:$src2, VR256:$src3))), - (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(v8i32 (add VR256:$src1, - (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), - (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(v4i32 (add VR128:$src1, - (X86vpmaddwd_su VR128:$src2, VR128:$src3))), - (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(v4i32 (add VR128:$src1, - (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), - (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; -} - -//===----------------------------------------------------------------------===// +// AVX_VNNI +//===----------------------------------------------------------------------===// +let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI], Constraints = "$src1 = $dst" in +multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit IsCommutable> { + let isCommutable = IsCommutable in + def rr : AVX8I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, + VR128:$src2, VR128:$src3)))]>, + VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + + def rm : AVX8I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i128mem:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, + (loadv4i32 addr:$src3))))]>, + VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + + let isCommutable = IsCommutable in + def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, + VR256:$src2, VR256:$src3)))]>, + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; + + def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i256mem:$src3), + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, + (loadv8i32 addr:$src3))))]>, + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; +} + +defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>, ExplicitVEXPrefix; +defm VPDPBUSDS : avx_vnni_rm<0x51, "vpdpbusds", X86Vpdpbusds, 0>, ExplicitVEXPrefix; +defm VPDPWSSD : avx_vnni_rm<0x52, "vpdpwssd", X86Vpdpwssd, 1>, ExplicitVEXPrefix; +defm VPDPWSSDS : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, ExplicitVEXPrefix; + +def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), + (X86vpmaddwd node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in { + def : Pat<(v8i32 (add VR256:$src1, + (X86vpmaddwd_su VR256:$src2, VR256:$src3))), + (VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(v8i32 (add VR256:$src1, + (X86vpmaddwd_su VR256:$src2, (load addr:$src3)))), + (VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(v4i32 (add VR128:$src1, + (X86vpmaddwd_su VR128:$src2, VR128:$src3))), + (VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(v4i32 (add VR128:$src1, + (X86vpmaddwd_su VR128:$src2, (load addr:$src3)))), + (VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>; +} + +//===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values // @@ -7287,12 +7287,12 @@ let ExeDomain = SSEPackedSingle in { let isCommutable = 1 in def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), - "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + VEX_4V, VEX_L, Sched<[WriteFShuffle256]>; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), - "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } // Immediate transform to help with commuting. @@ -7300,27 +7300,27 @@ def Perm2XCommuteImm : SDNodeXForm<timm, [{ return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N)); }]>; -multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { - def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), - (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; - def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), - (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; - // Pattern with load in other operand. - def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), - (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, - (Perm2XCommuteImm timm:$imm))>; -} - +multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> { + def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))), + (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>; + def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))), + (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>; + // Pattern with load in other operand. + def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))), + (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, + (Perm2XCommuteImm timm:$imm))>; +} + let Predicates = [HasAVX] in { - defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; - defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; + defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>; + defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>; } let Predicates = [HasAVX1Only] in { - defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; - defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; - defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; - defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; + defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; + defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; + defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; } //===----------------------------------------------------------------------===// @@ -7689,24 +7689,24 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, WriteFShuffle256, f256mem>, VEX_W; //===----------------------------------------------------------------------===// -// VPERM2I128 - Permute Integer vector Values in 128-bit chunks +// VPERM2I128 - Permute Integer vector Values in 128-bit chunks // let isCommutable = 1 in def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, u8imm:$src3), - "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, - Sched<[WriteShuffle256]>, VEX_4V, VEX_L; + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + Sched<[WriteShuffle256]>, VEX_4V, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, u8imm:$src3), - "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; -let Predicates = [HasAVX2] in { - defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; - defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; - defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; - defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; -} +let Predicates = [HasAVX2] in { + defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; + defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; + defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; +} //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values |