diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td | 304 |
1 files changed, 152 insertions, 152 deletions
diff --git a/contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td b/contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td index 19012797ae..654dcc1b39 100644 --- a/contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/libs/llvm12/lib/Target/X86/X86InstrAVX512.td @@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM -def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), +def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), @@ -1414,12 +1414,12 @@ defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, - SDPatternOperator OpNode, - X86VectorVTInfo _Dst, - X86VectorVTInfo _Src> { + SDPatternOperator OpNode, + X86VectorVTInfo _Dst, + X86VectorVTInfo _Src> { defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", - (_Dst.VT (OpNode addr:$src))>, + (_Dst.VT (OpNode addr:$src))>, Sched<[SchedWriteShuffle.YMM.Folded]>, AVX5128IBase, EVEX; } @@ -1428,14 +1428,14 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, // the unmasked patterns so that we only use the DQ instructions when masking // is requested. multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, - SDPatternOperator OpNode, - X86VectorVTInfo _Dst, - X86VectorVTInfo _Src> { + SDPatternOperator OpNode, + X86VectorVTInfo _Dst, + X86VectorVTInfo _Src> { let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", (null_frag), - (_Dst.VT (OpNode addr:$src))>, + (_Dst.VT (OpNode addr:$src))>, Sched<[SchedWriteShuffle.YMM.Folded]>, AVX5128IBase, EVEX; } @@ -1445,194 +1445,194 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, // defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", - X86SubVBroadcastld128, v16i32_info, v4i32x_info>, + X86SubVBroadcastld128, v16i32_info, v4i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", - X86SubVBroadcastld128, v16f32_info, v4f32x_info>, + X86SubVBroadcastld128, v16f32_info, v4f32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", - X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, + X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", - X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, + X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; let Predicates = [HasAVX512] in { -def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), +def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), (VBROADCASTF64X4rm addr:$src)>; -def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTF64X4rm addr:$src)>; -def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), - (VBROADCASTI64X4rm addr:$src)>; -def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), +def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), + (VBROADCASTF64X4rm addr:$src)>; +def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), (VBROADCASTI64X4rm addr:$src)>; -def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), +def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), (VBROADCASTI64X4rm addr:$src)>; -def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), +def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), (VBROADCASTI64X4rm addr:$src)>; +def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), + (VBROADCASTI64X4rm addr:$src)>; -def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF32X4rm addr:$src)>; -def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4rm addr:$src)>; -def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4rm addr:$src)>; -def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4rm addr:$src)>; +def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4rm addr:$src)>; -def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4rm addr:$src)>; -def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4rm addr:$src)>; +def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTI32X4rm addr:$src)>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), + (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), (v16f32 immAllZerosV)), (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), + (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), + (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), (v16i32 immAllZerosV)), (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), + (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), + (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), (v8f64 immAllZerosV)), (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), + (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), + (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), (v8i64 immAllZerosV)), (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), + (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", - X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, + X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", - X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, + X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; -def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF32X4Z256rm addr:$src)>; -def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTF32X4Z256rm addr:$src)>; -def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), - (VBROADCASTI32X4Z256rm addr:$src)>; -def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4Z256rm addr:$src)>; +def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4Z256rm addr:$src)>; -def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4Z256rm addr:$src)>; -def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), +def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTI32X4Z256rm addr:$src)>; +def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTI32X4Z256rm addr:$src)>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), + (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), (v8f32 immAllZerosV)), (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), + (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), + (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), (v8i32 immAllZerosV)), (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), + (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", - X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, + X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", - X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, + X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK4WM:$mask, - (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), + (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), (v4f64 immAllZerosV)), (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, - (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), + (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, - (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), + (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), (v4i64 immAllZerosV)), (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK4WM:$mask, - (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), + (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), VR256X:$src0), (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; } let Predicates = [HasDQI] in { defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", - X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, + X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", - X86SubVBroadcastld256, v16i32_info, v8i32x_info>, + X86SubVBroadcastld256, v16i32_info, v8i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT8>; defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", - X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, + X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", - X86SubVBroadcastld256, v16f32_info, v8f32x_info>, + X86SubVBroadcastld256, v16f32_info, v8f32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT8>; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), + (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), (v16f32 immAllZerosV)), (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), + (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), + (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), (v16i32 immAllZerosV)), (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK16WM:$mask, - (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), + (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), VR512:$src0), (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), + (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), (v8f64 immAllZerosV)), (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), + (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), + (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), (v8i64 immAllZerosV)), (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect_mask VK8WM:$mask, - (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), + (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), VR512:$src0), (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; } @@ -2531,71 +2531,71 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; - - // Patterns for mask intrinsics. - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, - (_.KVT immAllOnesV)), - (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; - - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, - _.RC:$src2, timm:$cc)>; - - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, - (_.KVT immAllOnesV)), - (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; - - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, - _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, - addr:$src2, timm:$cc)>; - - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, - (_.KVT immAllOnesV)), - (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; - - def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, - _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, - addr:$src2, timm:$cc)>; - - // Patterns for mask intrinsics with loads in other operand. - def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, - (_.KVT immAllOnesV)), - (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, - (X86cmpm_imm_commute timm:$cc))>; - - def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, - _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, - _.RC:$src1, addr:$src2, - (X86cmpm_imm_commute timm:$cc))>; - - def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, - (_.KVT immAllOnesV)), - (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, - (X86cmpm_imm_commute timm:$cc))>; - - def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, - _.KRCWM:$mask), - (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, - _.RC:$src1, addr:$src2, - (X86cmpm_imm_commute timm:$cc))>; + + // Patterns for mask intrinsics. + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, + (_.KVT immAllOnesV)), + (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, + _.RC:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, + (_.KVT immAllOnesV)), + (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, + _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, + addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, + (_.KVT immAllOnesV)), + (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; + + def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, + _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, + addr:$src2, timm:$cc)>; + + // Patterns for mask intrinsics with loads in other operand. + def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + (_.KVT immAllOnesV)), + (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + (_.KVT immAllOnesV)), + (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; + + def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, + _.KRCWM:$mask), + (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (X86cmpm_imm_commute timm:$cc))>; } multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { // comparison code form (VCMP[EQ/LT/LE/...] let Uses = [MXCSR] in - defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), - (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), + defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc", - [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), - (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], - [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), - (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, + [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), + (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], + [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), + (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, EVEX_B, Sched<[sched]>; } @@ -2855,8 +2855,8 @@ def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; -def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; +def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; @@ -2937,9 +2937,9 @@ let Predicates = [HasAVX512] in { def : Pat<(insert_subvector (v16i1 immAllZerosV), (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), - (KMOVWkr (AND32ri8 - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), - (i32 1)))>; + (KMOVWkr (AND32ri8 + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), + (i32 1)))>; } // Mask unary operation @@ -6504,8 +6504,8 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, - fma, X86FmaddRnd>; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, + fma, X86FmaddRnd>; defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, @@ -6595,8 +6595,8 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, - fma, X86FmaddRnd>; +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, + fma, X86FmaddRnd>; defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, @@ -6687,8 +6687,8 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, - fma, X86FmaddRnd>; +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, + fma, X86FmaddRnd>; defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, @@ -6790,7 +6790,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, } } -defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; @@ -6998,7 +6998,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp, } } -defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", +defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SS", X86Movss, v4f32x_info, fp32imm0>; defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SS", X86Movss, v4f32x_info, fp32imm0>; @@ -7007,7 +7007,7 @@ defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMA defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS", X86Movss, v4f32x_info, fp32imm0>; -defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", +defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SD", X86Movsd, v2f64x_info, fp64imm0>; defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SD", X86Movsd, v2f64x_info, fp64imm0>; @@ -7540,7 +7540,7 @@ multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { - let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { + let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; @@ -7551,7 +7551,7 @@ multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { - let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { + let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in { defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, EVEX_CD8<32, CD8VT1>, XS; @@ -10879,7 +10879,7 @@ multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, def mr : AVX512Ii8<opc, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), + [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), addr:$dst)]>, EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; } @@ -10890,7 +10890,7 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, - (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, + (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, EVEX, TAPD, Sched<[WriteVecExtract]>; defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; @@ -10903,7 +10903,7 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, - (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, + (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, EVEX, PD, Sched<[WriteVecExtract]>; let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in @@ -10943,13 +10943,13 @@ defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, PatFrag LdFrag, - SDPatternOperator immoperator> { + X86VectorVTInfo _, PatFrag LdFrag, + SDPatternOperator immoperator> { def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, - (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, + (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } @@ -10960,10 +10960,10 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, - (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, + (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, Sched<[WriteVecInsert]>; - defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; + defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; } } @@ -10978,7 +10978,7 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, EVEX_4V, TAPD, Sched<[WriteVecInsert]>; defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, - _.ScalarLdFrag, imm>, TAPD; + _.ScalarLdFrag, imm>, TAPD; } } |