aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/clang16/include/clang/Basic/arm_cde.td
blob: 6a00e669864c8546df44684072d76e9fdf0eda0d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the set of ACLE-specified source-level intrinsic
// functions wrapping the CDE instructions.
//
//===----------------------------------------------------------------------===//

include "arm_mve_defs.td"

// f64 is not defined in arm_mve_defs.td because MVE instructions only work with
// f16 and f32
def f64: PrimitiveType<"f", 64>;

// Float<t> expects t to be a scalar type, and expands to the floating-point
// type of the same width.
class Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
def FScalar: Float<Scalar>;

// ACLE CDE intrinsic
class CDEIntrinsic<Type ret, dag args, dag codegen>
  : Intrinsic<ret, args, codegen> {
  let builtinExtension = "cde";
}

// Immediate (in range [0, 2^numBits - 1])
class IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>;
// numBits-wide immediate of type u32
class CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>;

// LLVM IR CDE intrinsic
class CDEIRInt<string name, list<Type> params = [], bit appendKind = 0>
      : IRIntBase<"arm_cde_" # name, params, appendKind>;

// Class for generating function macros in arm_cde.h:
// "#define <name>(<params>) <definition>"
class FunctionMacro<list<string> params_, string definition_> {
  list<string> params = params_;
  string definition = definition_;
}

// Coprocessor immediate
def imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>;

// Immediate integer parameters
def imm_3b : CDEImmediateBits<3>;
def imm_4b : CDEImmediateBits<4>;
def imm_6b :  CDEImmediateBits<6>;
def imm_7b :  CDEImmediateBits<7>;
def imm_9b :  CDEImmediateBits<9>;
def imm_11b : CDEImmediateBits<11>;
def imm_12b : CDEImmediateBits<12>;
def imm_13b : CDEImmediateBits<13>;

// CX* instructions operating on GPRs
multiclass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
  defvar cp = (args imm_coproc:$cp);
  let pnt = PNT_None, params = T.None in {
    def "" : CDEIntrinsic<u32, !con(cp, argsReg, argsImm),
                               !con((CDEIRInt<NAME> $cp), cgArgs, (? $imm))>;
    def a  : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg, argsImm),
                               !con((CDEIRInt<NAME # "a"> $cp, $acc),
                                    cgArgs, (? $imm))>;

    def d :
      CDEIntrinsic<u64, !con(cp, argsReg, argsImm),
            (seq !con((CDEIRInt<NAME # "d"> $cp), cgArgs, (? $imm)):$pair,
                 (or (shl (u64 (xval $pair, 1)), (u64 32)),
                          (u64 (xval $pair, 0))))>;
    def da :
      CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg, argsImm),
            (seq (u32 (lshr $acc, (u64 32))):$acc_hi,
                 (u32 $acc):$acc_lo,
                 !con((CDEIRInt<NAME # "da"> $cp, $acc_lo, $acc_hi), cgArgs,
                       (? $imm)):$pair,
                 (or (shl (u64 (xval $pair, 1)), (u64 32)),
                          (u64 (xval $pair, 0))))>;
  }
}

defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;

// VCX* instructions operating on VFP registers
multiclass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
  defvar cp = (args imm_coproc:$cp);
  let pnt = PNT_None, params = [u32] in {
    def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
          (bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
                   Scalar)>;
    def a  : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
          (bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
                         (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
  }
  let pnt = PNT_None, params = [u64] in {
    def d  : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
          (bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
                   Scalar)>;
    def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
          (bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
                         (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
  }
}

defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
                       (? (bitcast $n, FScalar))>;
defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
                       (args u32:$n, u32:$m), (args u64:$n, u64:$m),
                       (? (bitcast $n, FScalar), (bitcast $m, FScalar))>;

// VCX* instructions operating on Q vector registers

def v16u8 : VecOf<u8>;

let pnt = PNT_None, params = [u8] in
def vcx1q : CDEIntrinsic<Vector, (args imm_coproc:$cp, imm_12b:$imm),
                         (CDEIRInt<"vcx1q"> $cp, $imm)>;

let pnt = PNT_Type, params = T.All, polymorphicOnly = 1 in {
  def vcx1qa :
    CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$acc, imm_12b:$imm),
            (bitcast (CDEIRInt<"vcx1qa"> $cp, (bitcast $acc, v16u8), $imm),
                     Vector)>;

  def vcx2q :
    CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
            (bitcast (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm),
                      Vector)>;
  def vcx2q_u8 :
    CDEIntrinsic<v16u8, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
            (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm)>;

  def vcx2qa_impl :
    CDEIntrinsic<Vector,
            (args imm_coproc:$cp, Vector:$acc, v16u8:$n, imm_7b:$imm),
            (bitcast (CDEIRInt<"vcx2qa"> $cp, (bitcast $acc, v16u8), $n, $imm),
                     Vector)>;

  def vcx3q_impl :
    CDEIntrinsic<Vector,
            (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
            (bitcast (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm),
                     Vector)>;
  def vcx3q_u8_impl :
    CDEIntrinsic<v16u8,
            (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
            (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm)>;
  def vcx3qa_impl :
    CDEIntrinsic<Vector,
            (args imm_coproc:$cp, Vector:$acc, v16u8:$n, v16u8:$m, imm_4b:$imm),
            (bitcast (CDEIRInt<"vcx3qa"> $cp, (bitcast $acc, v16u8), $n, $m,
                                         $imm),
                     Vector)>;
}

// Reinterpret intrinsics required to implement __arm_vcx*q with 2 or 3
// polymorphic paramters.
let params = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32],
    headerOnly = 1, polymorphicOnly = 1 in
def vreinterpretq_u8 :
    Intrinsic<v16u8, (args Vector:$x), (vreinterpret $x, v16u8)>;

// We need vreinterpretq_u8_u8 to avoid doing smart tricks in the macros
let params = [u8], polymorphicOnly = 1 in
def vreinterpretq_u8_cde :
    CDEIntrinsic<v16u8, (args Vector:$x), (id $x)>,
    NameOverride<"vreinterpretq_u8">;


def vcx2qa : FunctionMacro<
  ["cp", "acc", "n", "imm"],
  "__arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))">;

def vcx3q : FunctionMacro<
  ["cp", "n", "m", "imm"],
  "__arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
def vcx3q_u8 : FunctionMacro<
  ["cp", "n", "m", "imm"],
  "__arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
def vcx3qa : FunctionMacro<
  ["cp", "acc", "n", "m", "imm"],
  "__arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), "
                     "__arm_vreinterpretq_u8(m), (imm))">;

class CDEIntrinsicMasked<string irname, dag argsReg, dag imm, dag cgArgs>
  : CDEIntrinsic<Vector,
      !con((args imm_coproc:$cp, Vector:$inactive_or_acc),
           argsReg, imm, (args Predicate:$pred)),
      !con((CDEIRInt<irname # "_predicated", [Vector,Predicate]>
            $cp, $inactive_or_acc), cgArgs, (? $imm, $pred))> {
  let params = T.All;
  let polymorphicOnly = 1;
}

def vcx1q_m : CDEIntrinsicMasked<"vcx1q", (args), (args imm_12b:$imm), (?)>;
def vcx1qa_m : CDEIntrinsicMasked<"vcx1qa", (args), (args imm_12b:$imm), (?)>;

multiclass VCXPredicated<dag argsReg, dag imm, dag cgArgs,
                         list<string> macroArgs, string macro> {
  def _m_impl : CDEIntrinsicMasked<NAME, argsReg, imm, cgArgs>;
  def a_m_impl : CDEIntrinsicMasked<NAME#"a", argsReg, imm, cgArgs>;

  def _m: FunctionMacro<
    !listconcat(["cp", "inactive"], macroArgs, ["imm", "pred"]),
    "__arm_"#NAME#"_m_impl((cp), (inactive), "#macro#" (imm), (pred))">;
  def a_m: FunctionMacro<
    !listconcat(["cp", "acc"], macroArgs, ["imm", "pred"]),
    "__arm_"#NAME#"a_m_impl((cp), (acc), "#macro#" (imm), (pred))">;
}

defm vcx2q :
  VCXPredicated<(args v16u8:$n), (args imm_7b:$imm), (? $n), ["n"],
                "__arm_vreinterpretq_u8(n),">;
defm vcx3q :
  VCXPredicated<(args v16u8:$n, v16u8:$m), (args imm_4b:$imm), (? $n, $m),
                ["n", "m"], "__arm_vreinterpretq_u8(n), "
                            "__arm_vreinterpretq_u8(m),">;

// vreinterpretq intrinsics required by the ACLE CDE specification

foreach desttype = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32] in {
  let params = [u8], headerOnly = 1, pnt = PNT_None in
  def "vreinterpretq_" # desttype : Intrinsic<
    VecOf<desttype>, (args Vector:$x), (vreinterpret $x, VecOf<desttype>)>;
}