aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm16/lib/Target/ARM/ARMScheduleM55.td
blob: f24f97b26f0aa557720d0dcbdee60479338da0c2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
//==- ARMScheduleM55.td - Arm Cortex-M55 Scheduling Definitions -*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the scheduling model for the Arm Cortex-M55 processors.
//
//===----------------------------------------------------------------------===//

// ===---------------------------------------------------------------------===//
// Cortex-M55 is a lot like the M4/M33 in terms of scheduling. It technically
// has an extra pipeline stage but that is unimportant for scheduling, just
// starting our model a stage later. The main points of interest over an
// Cortex-M4 are MVE instructions and the ability to dual issue thumb1
// instructions.
//
//
// MVE
//
// The EPU pipelines now include both MVE and FP instructions. It has four
// pipelines across 4 stages (E1-E4). These pipelines are "control",
// "load/store", "integer" and "float/mul". We start the schedule at E2 to line
// up with the rest of the pipeline we model, and take the latency as the time
// between reading registers (almost always in E2) and register write (or
// forward, if it allows it). This mean that a lot of instructions (including
// loads) actually take 1 cycle (amazingly).
//
// Each MVE instruction needs to take 2 beats, each performing 64bits of the
// 128bit vector operation. So long as the beats are to different pipelines,
// the execution of the first-beat-of-the-second-instruction can overlap with
// the second-beat-of-the-first. For example a sequence of VLDR;VADD;VMUL;VSTR
// can look like this is a pipeline:
//          1    2    3    4    5
// LD/ST  : VLDR VLDR      VSTR VSTR
// INTEGER:      VADD VADD
// FP/MUL :           VMUL VMUL
//
// But a sequence of VLDR;VLDRB;VADD;VSTR because the loads cannot overlap,
// looks like:
//          1     2     3     4     5    6
// LD/ST  : VLDR  VLDR  VLDRB VLDRB VSTR VSTR
// INTEGER:                   VADD  VADD
//
// For this schedule, we currently model latencies and pipelines well for each
// instruction. MVE instruction take two beats, modelled using
// ResourceCycles=[2].
//
//
// Dual Issue
//
// Cortex-M55 can dual issue two 16-bit T1 instructions providing one is one of
// NOPs, ITs, Brs, ADDri/SUBri, UXTB/H, SXTB/H and MOVri's. NOPs and IT's are
// not relevant (they will not appear when scheduling), Brs are only at the end
// of the block. The others are more useful, and where the problems arise.
//
// The first problem comes from the fact that we will only be seeing Thumb2
// instructions at the point in the pipeline where we do the scheduling. The
// Thumb2SizeReductionPass has not been run yet. Especially pre-ra scheduling
// (where the scheduler has the most freedom) we can only really guess at which
// instructions will become thumb1 instructions. We are quite optimistic, and
// may get some things wrong as a result.
//
// The other problem is one of telling llvm what to do exactly. The way we
// attempt to meld this is:
//  Set IssueWidth to 2 to allow 2 instructions per cycle.
//  All instructions we cannot dual issue are "SingleIssue=1" (MVE/FP and T2
//    instructions)
//  We guess at another set of instructions that will become T1 instruction.
//    These become the primary instruction in a dual issue pair (the normal
//    one). These use normal resources and latencies, but set SingleIssue = 0.
//  We guess at another set of instructions that will be shrank down into T1 DI
//    instructions (add, sub, mov's, etc), which become the secondary. These
//    don't use a resource, and set SingleIssue = 0.
//
// So our guessing is a bit rough. It may be possible to improve this by moving
// T2SizeReduction pass earlier in the pipeline, for example, so that at least
// Post-RA scheduling sees what is T1/T2. It may also be possible to write a
// custom instruction matcher for more accurately guess at T1 instructions.


def CortexM55Model : SchedMachineModel {
  let MicroOpBufferSize = 0;      // Explicitly set to zero since M55 is in-order.
  let IssueWidth = 2;             // There is some dual-issue support in M55.
  let MispredictPenalty = 3;      // Default is 10
  let LoadLatency = 4;            // Default is 4
  let PostRAScheduler = 1;
  let FullInstRWOverlapCheck = 1;

  let CompleteModel = 0;
  let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasMatMulInt8, HasZCZ,
                             IsNotMClass, HasV8, HasV8_3a, HasTrustZone, HasDFB,
                             IsWindows];
}


let SchedModel = CortexM55Model in {

//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available.

// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
// M55 is in-order.
def M55UnitALU : ProcResource<1> { let BufferSize = 0; } // Int ALU
def M55UnitVecALU : ProcResource<1> { let BufferSize = 0; } // MVE integer pipe
def M55UnitVecFPALU : ProcResource<1> { let BufferSize = 0; } // MVE float pipe
def M55UnitLoadStore : ProcResource<1> { let BufferSize = 0; } // MVE load/store pipe
def M55UnitVecSys : ProcResource<1> { let BufferSize = 0; } // MVE control/sys pipe

// Some VMOV's can go down either pipeline. FIXME: This M55Write2IntFPE2 is
// intended to model the VMOV taking either Int or FP for 2 cycles. It is not
// clear if the llvm scheduler is using it like we want though.
def M55UnitVecIntFP: ProcResGroup<[M55UnitVecALU, M55UnitVecFPALU]>;


//===----------------------------------------------------------------------===//
// Subtarget-specific SchedWrite types which both map the ProcResources and
// set the latency.

//=====//
// ALU //
//=====//

// Generic writes for Flags, GRPs and other extra operands (eg post-inc, vadc flags, vaddlv etc)
def M55WriteLat0  : SchedWriteRes<[]>  { let Latency = 0; let NumMicroOps = 0; }
def M55WriteLat1  : SchedWriteRes<[]>  { let Latency = 1; let NumMicroOps = 0; }
def M55WriteLat2  : SchedWriteRes<[]>  { let Latency = 2; let NumMicroOps = 0; }

// DX instructions are ALU instructions that take a single cycle. The
// instructions that may be shrank to T1 (and can be dual issued) are
// SingleIssue = 0. The others are SingleIssue = 1.
let SingleIssue = 0, Latency = 1 in {
    def : WriteRes<WriteALU, [M55UnitALU]>;
    def : WriteRes<WriteCMP, [M55UnitALU]>;
    def : WriteRes<WriteBr, [M55UnitALU]>;
    def : WriteRes<WriteBrL, [M55UnitALU]>;
    def : WriteRes<WriteBrTbl, [M55UnitALU]>;
    def : WriteRes<WriteST, [M55UnitALU]>;
    def M55WriteDX_DI : SchedWriteRes<[M55UnitALU]>;
}
let SingleIssue = 1, Latency = 1 in {
    def : WriteRes<WritePreLd, [M55UnitALU]>;
    def M55WriteDX_SI : SchedWriteRes<[M55UnitALU]>;
}

def : InstRW<[M55WriteDX_SI], (instregex "t2BF[CI]", "t2CPS", "t2DBG",
          "t2MRS", "t2MSR", "t2SEL", "t2SG", "t2TT")>;
def : InstRW<[M55WriteDX_SI], (instregex "t2SUBS_PC_LR", "COPY")>;
def : InstRW<[M55WriteDX_SI], (instregex "t2CS(EL|INC|INV|NEG)")>;
// Thumb 2 instructions that could be reduced to a thumb 1 instruction and can
// be dual issued with one of the above. This list is optimistic.
def : InstRW<[M55WriteDX_DI], (instregex "t2ADDC?rr$", "t2ADDrr$",
           "t2ADDSrr$", "t2ANDrr$", "t2ASRr[ir]$", "t2BICrr$", "t2CMNzrr$",
           "t2CMPr[ir]$", "t2EORrr$", "t2LSLr[ir]$", "t2LSRr[ir]$", "t2MVNr$",
           "t2ORRrr$", "t2REV(16|SH)?$", "t2RORrr$", "t2RSBr[ir]$", "t2RSBSri$",
           "t2SBCrr$", "t2SUBS?rr$", "t2TEQrr$", "t2TSTrr$", "t2STRi12$",
           "t2STRs$", "t2STRBi12$", "t2STRBs$", "t2STRHi12$", "t2STRHs$",
           "t2STR_POST$", "t2STMIA$", "t2STMIA_UPD$", "t2STMDB$", "t2STMDB_UPD$")>;
def : InstRW<[M55WriteDX_DI], (instregex "t2SETPAN$", "tADC$", "tADDhirr$",
           "tADDrSP$", "tADDrSPi$", "tADDrr$", "tADDspi$", "tADDspr$", "tADR$",
           "tAND$", "tASRri$", "tASRrr$", "tBIC$", "tBKPT$", "tCBNZ$", "tCBZ$",
           "tCMNz$", "tCMPhir$", "tCMPi8$", "tCMPr$", "tCPS$", "tEOR$", "tHINT$",
           "tHLT$", "tLSLri$", "tLSLrr$", "tLSRri$", "tLSRrr$", "tMOVSr$",
           "tMUL$", "tMVN$", "tORR$", "tPICADD$", "tPOP$", "tPUSH$", "tREV$",
           "tREV16$", "tREVSH$", "tROR$", "tRSB$", "tSBC$", "tSETEND$",
           "tSTMIA_UPD$", "tSTRBi$", "tSTRBr$", "tSTRHi$", "tSTRHr$", "tSTRi$",
           "tSTRr$", "tSTRspi$", "tSUBrr$", "tSUBspi$", "tSVC$", "tTRAP$",
           "tTST$", "tUDF$")>;
def : InstRW<[M55WriteDX_DI], (instregex "tB$", "tBLXNSr$", "tBLXr$", "tBX$",
           "tBXNS$", "tBcc$")>;


// CX instructions take 2 (or more) cycles. Again T1 instructions may be dual
// issues (SingleIssue = 0)
let SingleIssue = 0, Latency = 2 in {
    def : WriteRes<WriteLd, [M55UnitALU]>;
    def M55WriteCX_DI  : SchedWriteRes<[M55UnitALU]>;
}
let SingleIssue = 1, Latency = 2 in {
    def : WriteRes<WriteALUsi, [M55UnitALU]>;
    def : WriteRes<WriteALUsr, [M55UnitALU]>;
    def : WriteRes<WriteALUSsr, [M55UnitALU]>;
    def : WriteRes<WriteCMPsi, [M55UnitALU]>;
    def : WriteRes<WriteCMPsr, [M55UnitALU]>;
    def : WriteRes<WriteDIV, [M55UnitALU]>;
    def M55WriteCX_SI  : SchedWriteRes<[M55UnitALU]>;
}

def : SchedAlias<WriteMUL16, M55WriteCX_SI>;
def : SchedAlias<WriteMUL32, M55WriteCX_SI>;
def : SchedAlias<WriteMUL64Lo, M55WriteCX_SI>;
def : WriteRes<WriteMUL64Hi, []> { let Latency = 2; }
def : SchedAlias<WriteMAC16, M55WriteCX_SI>;
def : SchedAlias<WriteMAC32, M55WriteCX_SI>;
def : SchedAlias<WriteMAC64Lo, M55WriteCX_SI>;
def : WriteRes<WriteMAC64Hi, []> { let Latency = 2; }

def : InstRW<[M55WriteCX_SI], (instregex "t2CDP", "t2CLREX", "t2[DI][MS]B",
           "t2MCR", "t2MOVSs[ir]", "t2MRC", "t2MUL", "t2STC")>;
def : InstRW<[M55WriteCX_SI], (instregex "t2Q", "t2[SU](ADD|ASX|BFX|DIV)",
           "t2[SU]H(ADD|ASX|SUB|SAX)", "t2SM[LM]", "t2S(SAT|SUB|SAX)", "t2UQ",
           "t2USA", "t2USUB", "t2UXTA[BH]")>;
def : InstRW<[M55WriteCX_SI], (instregex "t2LD[AC]", "t2STL", "t2STRD")>;
def : InstRW<[M55WriteCX_SI], (instregex "MVE_[SU]Q?R?SH[LR]$")>;
def : InstRW<[M55WriteCX_SI, M55WriteLat2], (instregex "MVE_ASRL", "MVE_LSLL",
            "MVE_LSRL", "MVE_[SU]Q?R?SH[LR]L")>;
// This may be higher in practice, but that likely doesn't make a difference
// for scheduling
def : InstRW<[M55WriteCX_SI], (instregex "t2CLRM")>;

def : InstRW<[M55WriteCX_DI], (instregex "t2LDR[BH]?i12$", "t2LDRS?[BH]?s$",
           "t2LDM")>;
def : InstRW<[M55WriteCX_DI], (instregex "tLDM", "tLDRBi$", "tLDRBr$",
           "tLDRHi$", "tLDRHr$", "tLDRSB$", "tLDRSH$", "tLDRi$", "tLDRpci$",
           "tLDRr$", "tLDRspi$")>;

// Dual Issue instructions
let Latency = 1, SingleIssue = 0 in {
    def : WriteRes<WriteNoop, []>;
    def M55WriteDI : SchedWriteRes<[]>;
}

def : InstRW<[M55WriteDI], (instregex "tADDi[38]$", "tSUBi[38]$", "tMOVi8$",
           "tMOVr$", "tUXT[BH]$", "tSXT[BH]$")>;
// Thumb 2 instructions that could be reduced to a dual issuable Thumb 1
// instruction above.
def : InstRW<[M55WriteDI], (instregex "t2ADDS?ri$", "t2MOV[ir]$", "t2MOVi16$",
           "t2MOVr$", "t2SUBS?ri$", "t2[US]XT[BH]$")>;
def : InstRW<[M55WriteDI], (instregex "t2IT", "IT")>;


def : InstRW<[M55WriteLat0], (instregex "t2LoopDec")>;

// Forwarding

// No forwarding in the ALU normally
def : ReadAdvance<ReadALU, 0>;
def : ReadAdvance<ReadALUsr, 0>;
def : ReadAdvance<ReadMUL, 0>;
def : ReadAdvance<ReadMAC, 0>;

//=============//
// MVE and VFP //
//=============//

// The Writes that take ResourceCycles=[2] are MVE instruction, the others VFP.

let SingleIssue = 1, Latency = 1 in {
  def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>;
  def M55WriteIntE2 : SchedWriteRes<[M55UnitVecALU]>;
  def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>;
  def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>;

  def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
  def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
  def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ResourceCycles=[2]; }
}

let SingleIssue = 1, Latency = 2 in {
  def M55WriteLSE3 : SchedWriteRes<[M55UnitLoadStore]>;
  def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>;
  def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>;

  def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
  def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
}

let SingleIssue = 1, Latency = 3 in {
  def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }

  // Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores
  def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
}
let SingleIssue = 1, Latency = 4 in {
  def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 9 in {
  def M55WriteFloatE3Plus7 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 15 in {
  def M55WriteFloatE3Plus13 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 16 in {
  def M55WriteFloatE3Plus14 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 21 in {
  def M55WriteFloatE3Plus19 : SchedWriteRes<[M55UnitVecFPALU]>;
}
// VMUL (Double precision) + VADD (Double precision)
let SingleIssue = 1, Latency = 24 in {
  def M55WriteFloatE3Plus22 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 30 in {
  def M55WriteFloatE3Plus28 : SchedWriteRes<[M55UnitVecFPALU]>;
}
let SingleIssue = 1, Latency = 36 in {
  def M55WriteFloatE3Plus34 : SchedWriteRes<[M55UnitVecFPALU]>;
}

def M55Read0 : SchedReadAdvance<0>;
def M55Read1 : SchedReadAdvance<1, [M55Write2LSE3, M55Write2IntE3, M55Write2FloatE3]>;
def M55GatherQRead : SchedReadAdvance<-4>;

// MVE instructions

// Loads and Stores of different kinds

// Normal loads
def : InstRW<[M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)$")>;
// Pre/post inc loads
def : InstRW<[M55WriteLat1, M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)_(post|pre)$")>;
// Gather loads
def : InstRW<[M55Write2LSE3, M55Read0, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_rq")>;
def : InstRW<[M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_qi$")>;
def : InstRW<[M55WriteLat1, M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(W|D)U(32|64)_qi_pre$")>;
// Interleaving loads
def : InstRW<[M55Write2LSE2], (instregex "MVE_VLD[24][0-3]_(8|16|32)$")>;
// Interleaving loads with wb
def : InstRW<[M55Write2LSE2, M55WriteLat1], (instregex "MVE_VLD[24][0-3]_(8|16|32)_wb$")>;

// Normal stores
def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)$")>;
// Pre/post inc stores
def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)_(post|pre)$")>;
// Scatter stores
def : InstRW<[M55Write2LSE2, M55Read0, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_rq")>;
def : InstRW<[M55Write2LSE2, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_qi")>;
// Interleaving stores
def : InstRW<[M55Write2LSE2], (instregex "MVE_VST(2|4)")>;

// Integer pipe operations

def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_VABAV")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VABD(u|s)")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VABS(u|s)")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VADC")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VADD(_qr_)?i")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VAND")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VBIC")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VBRSR")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VCADDi")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLS")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLZ")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_V(D|I)?W?DUP")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VEOR")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>;
def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>;
def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>;
def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VMVN")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VNEG(u|s)")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VORN")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VORR")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VPSEL")>;
def : InstRW<[M55Write2IntE2], (instregex "MQPRCopy")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VQABS")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VQADD")>;
def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQMOV")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VQNEG")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHL")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_V[QR]SHL")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VQRSHL")>;
def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQ?R?SHRU?N")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHR_")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VRSHR_")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VQSUB")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VREV")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VRHADD")>;
def : InstRW<[M55Write2IntE3], (instregex "MVE_VSBC")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VSLI")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VSRI")>;
def : InstRW<[M55Write2IntE2], (instregex "MVE_VSUB(_qr_)?i")>;

// FP/Mul pipe operations.

def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABDf")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABSf")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VADDf")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADD_qr_f")>;
def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VADDLV")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADDV")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCADDf")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMLA")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMUL")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMP(i|s|u)", "MVE_VPTv(4|8|16)(i|s|u)")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMPf", "MVE_VPTv(4|8)f")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf16(u|s)16")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32(u|s)32")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)16f16")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)32f32")>;
def : InstRW<[M55Write2FloatE4NoFwd], (instregex "MVE_VCVTf16f32")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32f16")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VFM(A|S)")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_V(MIN|MAX)NM")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_from_lane")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_rr_q")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMOVi")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMUL(_qr_)?[if]")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?MULH")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?D?MULL[TB]?[su]")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQDMULL_qr_")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?ML(A|S)[^L]")>;
def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VR?ML(A|S)L")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VNEGf")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VRINTf")>;
def : InstRW<[M55Write2FloatE2], (instregex "MVE_VSUBf")>;
def : InstRW<[M55Write2FloatE3], (instregex "MVE_VSUB_qr_f")>;

// Some VMOV's can go down either pipeline.
def : InstRW<[M55Write2IntFPE2], (instregex "MVE_VMOV_to_lane", "MVE_VMOV_q_rr")>;

def : InstRW<[M55WriteSysE2], (instregex "MVE_VCTP")>;
def : InstRW<[M55WriteSysE2], (instregex "MVE_VPNOT")>;
def : InstRW<[M55WriteSysE2], (instregex "MVE_VPST")>;


// VFP instructions

def : SchedAlias<WriteFPCVT, M55WriteFloatE3>;
def : SchedAlias<WriteFPMOV, M55WriteFloatE3>;
def : SchedAlias<WriteFPALU32, M55WriteFloatE3>;
def : SchedAlias<WriteFPALU64, M55WriteFloatE3Plus13>;
def : SchedAlias<WriteFPMUL32, M55WriteFloatE3>;
def : SchedAlias<WriteFPMUL64, M55WriteFloatE3Plus19>;
def : SchedAlias<WriteFPMAC32, M55WriteFloatE3Plus2>;
def : SchedAlias<WriteFPMAC64, M55WriteFloatE3Plus34>;
def : SchedAlias<WriteFPDIV32, M55WriteFloatE3Plus14>;
def : SchedAlias<WriteFPDIV64, M55WriteFloatE3Plus28>;
def : SchedAlias<WriteFPSQRT32, M55WriteFloatE3Plus14>;
def : SchedAlias<WriteFPSQRT64, M55WriteFloatE3Plus28>;
def : ReadAdvance<ReadFPMUL, 0>;
def : ReadAdvance<ReadFPMAC, 0>;

def : InstRW<[M55WriteLSE3], (instregex "VLD")>;
def : InstRW<[M55WriteLSE2], (instregex "VST")>;
def : InstRW<[M55WriteLSE3], (instregex "VLLD", "VLST")>;

def : InstRW<[M55WriteFloatE3], (instregex "VABS(H|S|D)")>;
def : InstRW<[M55WriteFloatE3], (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S|D)")>;
def : InstRW<[M55WriteFloatE3], (instregex "VCVT(B|T)(DH|HD)")>;
def : InstRW<[M55WriteFloatE2], (instregex "VCMPZ?(E|H|S|D)")>;
def : InstRW<[M55WriteFloatE3Plus7], (instregex "VDIVH")>;
def : InstRW<[M55WriteFloatE3], (instregex "VFN?M(A|S)(H|S)")>; // VFMA
def : InstRW<[M55WriteFloatE3Plus22], (instregex "VFN?M(A|S)D")>; // VFMA
def : InstRW<[M55WriteFloatE3], (instregex "VFP_V(MAX|MIN)NM")>;
def : InstRW<[M55WriteFloatE3], (instregex "VINSH$", "VMOVH$", "VMOVHR$", "VMOVSR$", "VMOVDRR$")>; // VINS, VMOVX, to-FP reg movs
def : InstRW<[M55WriteFloatE2], (instregex "VMOVD$", "VMOVS$", "VMOVR")>; // Other VMOV's
def : InstRW<[M55WriteFloatE2], (instregex "FCONSTH", "FCONSTS", "FCONSTD")>;
def : InstRW<[M55WriteFloatE2], (instregex "VGETLNi32", "VSETLNi32")>;
def : InstRW<[M55WriteFloatE2], (instregex "VMSR", "VMRS")>;
def : InstRW<[M55WriteFloatE3Plus2], (instregex "VN?ML(A|S)H")>; // VMLA
def : InstRW<[M55WriteFloatE3], (instregex "VNEG(H|S|D)")>;
def : InstRW<[M55WriteFloatE3], (instregex "VRINT(A|M|N|P|R|X|Z)(H|S|D)")>;
def : InstRW<[M55WriteFloatE3], (instregex "VSEL..(H|S|D)")>;
def : InstRW<[M55WriteFloatE3Plus7], (instregex "VSQRTH")>;

def : WriteRes<WriteVLD1, []>;
def : WriteRes<WriteVLD2, []>;
def : WriteRes<WriteVLD3, []>;
def : WriteRes<WriteVLD4, []>;
def : WriteRes<WriteVST1, []>;
def : WriteRes<WriteVST2, []>;
def : WriteRes<WriteVST3, []>;
def : WriteRes<WriteVST4, []>;

}