diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td')
-rw-r--r-- | contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td | 106 |
1 files changed, 53 insertions, 53 deletions
diff --git a/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td b/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td index 2ab48cfc4b..146e1890cb 100644 --- a/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td +++ b/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td @@ -49,40 +49,40 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { string ft = frag#":"#ptx_elt_type; list<LLVMType> regs = !cond( // mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops - !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2), - !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2), // fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16 // All currently supported geometries use the same fragment format, // so we only need to consider {fragment, type}. - !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8), - !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8), - !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4), - !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4), - !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8), - !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8), + !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8), + !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8), + !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4), + !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4), + !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8), + !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8), // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 - !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8), - !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8), !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty], !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty], - !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4), - !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4), - !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8), - !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8), - !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4), - !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4), !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty], !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty], - !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8), - !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8), // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty], @@ -91,10 +91,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty], - !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2), - !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2), ); } @@ -128,7 +128,7 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> { !eq(A.ptx_elt_type, "u4") : [A], !eq(A.ptx_elt_type, "b1") : [A], // the rest are FP ops identified by accumulator & result type. - true: [D, C] + true: [D, C] ); string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type)); } @@ -225,17 +225,17 @@ class NVVM_MMA_OPS<int _ = 0> { ldst_bit_ab_ops, ldst_subint_cd_ops); // Separate A/B/C fragments (loads) from D (stores). - list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d")); - list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d")); + list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d")); + list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d")); } def NVVM_MMA_OPS : NVVM_MMA_OPS; -// Returns true if this combination of layout/satf is supported; false otherwise. +// Returns true if this combination of layout/satf is supported; false otherwise. // MMA ops must provide all parameters. Loads and stores -- only frags and layout_a. // The class is used to prevent generation of records for the unsupported variants. // E.g. -// if NVVM_MMA_SUPPORTED<...>.ret then +// if NVVM_MMA_SUPPORTED<...>.ret then // def : FOO<>; // The record will only be defined for supported ops. // class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> { @@ -261,20 +261,20 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b # !if(!eq(!size(frags), 4), frags[2].ptx_elt_type # frags[3].ptx_elt_type, "?"); - bit ret = !cond( + bit ret = !cond( // Sub-int MMA only supports fixed A/B layout. // b1 does not support .satf. - !eq(mma#":"#satf, "b1:row:col:0") : true, + !eq(mma#":"#satf, "b1:row:col:0") : true, // mma.m8n8k4 has no .satf modifier. !and(!eq(frags[0].geom, "m8n8k4"), - !ne(satf, 0)): false, + !ne(satf, 0)): false, // mma.m8n8k4 has no C=f32 D=f16 variant. - !eq(gcd, "m8n8k4:f32f16"): false, - !eq(mma, "s4:row:col") : true, - !eq(mma, "u4:row:col") : true, - !eq(mma, "s4:row:col") : true, - !eq(mma, "u4:row:col") : true, + !eq(gcd, "m8n8k4:f32f16"): false, + !eq(mma, "s4:row:col") : true, + !eq(mma, "u4:row:col") : true, + !eq(mma, "s4:row:col") : true, + !eq(mma, "u4:row:col") : true, // Sub-int load/stores have fixed layout for A and B. !and(!eq(layout_b, "-"), // It's a Load or Store op !or(!eq(ld, "b1:a:row"), @@ -288,13 +288,13 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b !eq(ld, "u4:a:row"), !eq(ld, "u4:b:col"), !eq(ldf, "u4:c"), - !eq(ldf, "u4:d"))) : true, + !eq(ldf, "u4:d"))) : true, // All other sub-int ops are not supported. - !eq(t, "b1") : false, - !eq(t, "s4") : false, - !eq(t, "u4") : false, + !eq(t, "b1") : false, + !eq(t, "s4") : false, + !eq(t, "u4") : false, // All other (non sub-int) are OK. - true: true + true: true ); } @@ -307,8 +307,8 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { string Name = "int_nvvm_shfl_" # Suffix; string Builtin = "__nvvm_shfl_" # Suffix; string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); - bit withGccBuiltin = !not(return_pred); - bit withoutGccBuiltin = return_pred; + bit withGccBuiltin = !not(return_pred); + bit withoutGccBuiltin = return_pred; LLVMType OpType = !cond( !eq(type,"i32"): llvm_i32_ty, !eq(type,"f32"): llvm_float_ty); @@ -3998,18 +3998,18 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; // SHUFFLE // // Generate intrinsics for all variants of shfl instruction. -foreach sync = [false, true] in { +foreach sync = [false, true] in { foreach mode = ["up", "down", "bfly", "idx"] in { foreach type = ["i32", "f32"] in { - foreach return_pred = [false, true] in { + foreach return_pred = [false, true] in { foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in { - if i.withGccBuiltin then { + if i.withGccBuiltin then { def i.Name : GCCBuiltin<i.Builtin>, Intrinsic<i.RetTy, i.ArgsTy, [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>; } - if i.withoutGccBuiltin then { + if i.withoutGccBuiltin then { def i.Name : Intrinsic<i.RetTy, i.ArgsTy, [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>; } @@ -4120,11 +4120,11 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride> foreach layout = ["row", "col"] in { foreach stride = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def WMMA_NAME_LDST<"load", frag, layout, stride>.record : NVVM_WMMA_LD<frag, layout, stride>; foreach frag = NVVM_MMA_OPS.all_st_ops in - if NVVM_MMA_SUPPORTED<[frag], layout>.ret then + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def WMMA_NAME_LDST<"store", frag, layout, stride>.record : NVVM_WMMA_ST<frag, layout, stride>; } @@ -4143,7 +4143,7 @@ foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { - if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { + if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { def WMMA_NAME_MMA<layout_a, layout_b, satf, op[0], op[1], op[2], op[3]>.record : NVVM_WMMA_MMA<layout_a, layout_b, satf, |