aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td')
-rw-r--r--contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td106
1 files changed, 53 insertions, 53 deletions
diff --git a/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td b/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td
index 2ab48cfc4b..146e1890cb 100644
--- a/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td
+++ b/contrib/libs/llvm12/include/llvm/IR/IntrinsicsNVVM.td
@@ -49,40 +49,40 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
string ft = frag#":"#ptx_elt_type;
list<LLVMType> regs = !cond(
// mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops
- !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
- !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
+ !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
+ !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
// fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16
// All currently supported geometries use the same fragment format,
// so we only need to consider {fragment, type}.
- !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
- !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
- !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
- !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
- !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
- !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
+ !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
+ !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
+ !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
+ !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
+ !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
+ !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
// u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
- !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
- !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
!eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
!eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
- !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
- !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
- !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
- !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
- !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
- !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
+ !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
!eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
!eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
- !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
- !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
+ !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
// u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
!eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
@@ -91,10 +91,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
!eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
!eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
- !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
- !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
);
}
@@ -128,7 +128,7 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
!eq(A.ptx_elt_type, "u4") : [A],
!eq(A.ptx_elt_type, "b1") : [A],
// the rest are FP ops identified by accumulator & result type.
- true: [D, C]
+ true: [D, C]
);
string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
}
@@ -225,17 +225,17 @@ class NVVM_MMA_OPS<int _ = 0> {
ldst_bit_ab_ops,
ldst_subint_cd_ops);
// Separate A/B/C fragments (loads) from D (stores).
- list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
- list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
+ list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
+ list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
-// Returns true if this combination of layout/satf is supported; false otherwise.
+// Returns true if this combination of layout/satf is supported; false otherwise.
// MMA ops must provide all parameters. Loads and stores -- only frags and layout_a.
// The class is used to prevent generation of records for the unsupported variants.
// E.g.
-// if NVVM_MMA_SUPPORTED<...>.ret then
+// if NVVM_MMA_SUPPORTED<...>.ret then
// def : FOO<>; // The record will only be defined for supported ops.
//
class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> {
@@ -261,20 +261,20 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
# !if(!eq(!size(frags), 4),
frags[2].ptx_elt_type # frags[3].ptx_elt_type,
"?");
- bit ret = !cond(
+ bit ret = !cond(
// Sub-int MMA only supports fixed A/B layout.
// b1 does not support .satf.
- !eq(mma#":"#satf, "b1:row:col:0") : true,
+ !eq(mma#":"#satf, "b1:row:col:0") : true,
// mma.m8n8k4 has no .satf modifier.
!and(!eq(frags[0].geom, "m8n8k4"),
- !ne(satf, 0)): false,
+ !ne(satf, 0)): false,
// mma.m8n8k4 has no C=f32 D=f16 variant.
- !eq(gcd, "m8n8k4:f32f16"): false,
- !eq(mma, "s4:row:col") : true,
- !eq(mma, "u4:row:col") : true,
- !eq(mma, "s4:row:col") : true,
- !eq(mma, "u4:row:col") : true,
+ !eq(gcd, "m8n8k4:f32f16"): false,
+ !eq(mma, "s4:row:col") : true,
+ !eq(mma, "u4:row:col") : true,
+ !eq(mma, "s4:row:col") : true,
+ !eq(mma, "u4:row:col") : true,
// Sub-int load/stores have fixed layout for A and B.
!and(!eq(layout_b, "-"), // It's a Load or Store op
!or(!eq(ld, "b1:a:row"),
@@ -288,13 +288,13 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
!eq(ld, "u4:a:row"),
!eq(ld, "u4:b:col"),
!eq(ldf, "u4:c"),
- !eq(ldf, "u4:d"))) : true,
+ !eq(ldf, "u4:d"))) : true,
// All other sub-int ops are not supported.
- !eq(t, "b1") : false,
- !eq(t, "s4") : false,
- !eq(t, "u4") : false,
+ !eq(t, "b1") : false,
+ !eq(t, "s4") : false,
+ !eq(t, "u4") : false,
// All other (non sub-int) are OK.
- true: true
+ true: true
);
}
@@ -307,8 +307,8 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Name = "int_nvvm_shfl_" # Suffix;
string Builtin = "__nvvm_shfl_" # Suffix;
string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
- bit withGccBuiltin = !not(return_pred);
- bit withoutGccBuiltin = return_pred;
+ bit withGccBuiltin = !not(return_pred);
+ bit withoutGccBuiltin = return_pred;
LLVMType OpType = !cond(
!eq(type,"i32"): llvm_i32_ty,
!eq(type,"f32"): llvm_float_ty);
@@ -3998,18 +3998,18 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
// SHUFFLE
//
// Generate intrinsics for all variants of shfl instruction.
-foreach sync = [false, true] in {
+foreach sync = [false, true] in {
foreach mode = ["up", "down", "bfly", "idx"] in {
foreach type = ["i32", "f32"] in {
- foreach return_pred = [false, true] in {
+ foreach return_pred = [false, true] in {
foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
- if i.withGccBuiltin then {
+ if i.withGccBuiltin then {
def i.Name : GCCBuiltin<i.Builtin>,
Intrinsic<i.RetTy, i.ArgsTy,
[IntrInaccessibleMemOnly, IntrConvergent],
i.IntrName>;
}
- if i.withoutGccBuiltin then {
+ if i.withoutGccBuiltin then {
def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
[IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>;
}
@@ -4120,11 +4120,11 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
foreach layout = ["row", "col"] in {
foreach stride = [0, 1] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
- if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def WMMA_NAME_LDST<"load", frag, layout, stride>.record
: NVVM_WMMA_LD<frag, layout, stride>;
foreach frag = NVVM_MMA_OPS.all_st_ops in
- if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
+ if NVVM_MMA_SUPPORTED<[frag], layout>.ret then
def WMMA_NAME_LDST<"store", frag, layout, stride>.record
: NVVM_WMMA_ST<frag, layout, stride>;
}
@@ -4143,7 +4143,7 @@ foreach layout_a = ["row", "col"] in {
foreach layout_b = ["row", "col"] in {
foreach satf = [0, 1] in {
foreach op = NVVM_MMA_OPS.all_mma_ops in {
- if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
+ if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
def WMMA_NAME_MMA<layout_a, layout_b, satf,
op[0], op[1], op[2], op[3]>.record
: NVVM_WMMA_MMA<layout_a, layout_b, satf,