diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td | |
parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
download | ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td')
-rw-r--r-- | contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td | 2860 |
1 files changed, 1430 insertions, 1430 deletions
diff --git a/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td b/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td index 63531f72ad..fdd9727eb6 100644 --- a/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td +++ b/contrib/libs/llvm12/lib/Target/PowerPC/P9InstrResources.td @@ -1,1431 +1,1431 @@ -//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the resources required by P9 instructions. This is part of -// the P9 processor model used for instruction scheduling. This file should -// contain all the instructions that may be used on Power 9. This is not -// just instructions that are new on Power 9 but also instructions that were -// available on earlier architectures and are still used in Power 9. -// -// The makeup of the P9 CPU is modeled as follows: -// - Each CPU is made up of two superslices. -// - Each superslice is made up of two slices. Therefore, there are 4 slices -// for each CPU. -// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. -// - Each CPU has: -// - One CY (Crypto) unit P9_CY_* -// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* -// - Two PM (Permute) units. One on each superslice. P9_PM_* -// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* -// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* -// - Four DP (Floating Point) units. One on each slice. P9_DP_* -// This also includes fixed point multiply add. -// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* -// - Four Load/Store Queues. P9_LS_* -// - Each set of instructions will require a number of these resources. -//===----------------------------------------------------------------------===// - -// Two cycle ALU vector operation that uses an entire superslice. -// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines -// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. -def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - (instregex "VADDU(B|H|W|D)M$"), - (instregex "VAND(C)?$"), - (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), - (instregex "V_SET0(B|H)?$"), - (instregex "VS(R|L)(B|H|W|D)$"), - (instregex "VSUBU(B|H|W|D)M$"), - (instregex "VPOPCNT(B|H)$"), - (instregex "VRL(B|H|W|D)$"), - (instregex "VSRA(B|H|W|D)$"), - (instregex "XV(N)?ABS(D|S)P$"), - (instregex "XVCPSGN(D|S)P$"), - (instregex "XV(I|X)EXP(D|S)P$"), - (instregex "VRL(D|W)(MI|NM)$"), - (instregex "VMRG(E|O)W$"), - MTVSRDD, - VEQV, - VNAND, - VNEGD, - VNEGW, - VNOR, - VOR, - VORC, - VSEL, - VXOR, - XVNEGDP, - XVNEGSP, - XXLAND, - XXLANDC, - XXLEQV, - XXLEQVOnes, - XXLNAND, - XXLNOR, - XXLOR, - XXLORf, - XXLORC, - XXLXOR, - XXLXORdpz, - XXLXORspz, - XXLXORz, - XXSEL, - XSABSQP, - XSCPSGNQP, - XSIEXPQP, - XSNABSQP, - XSNEGQP, - XSXEXPQP -)>; - -// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a -// single slice. However, since it is Restricted, it requires all 3 dispatches -// (DISP) for that superslice. -def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - (instregex "TABORT(D|W)C(I)?$"), - (instregex "MTFSB(0|1)$"), - (instregex "MFFSC(D)?RN(I)?$"), - (instregex "CMPRB(8)?$"), - (instregex "TD(I)?$"), - (instregex "TW(I)?$"), +//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the resources required by P9 instructions. This is part of +// the P9 processor model used for instruction scheduling. This file should +// contain all the instructions that may be used on Power 9. This is not +// just instructions that are new on Power 9 but also instructions that were +// available on earlier architectures and are still used in Power 9. +// +// The makeup of the P9 CPU is modeled as follows: +// - Each CPU is made up of two superslices. +// - Each superslice is made up of two slices. Therefore, there are 4 slices +// for each CPU. +// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. +// - Each CPU has: +// - One CY (Crypto) unit P9_CY_* +// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* +// - Two PM (Permute) units. One on each superslice. P9_PM_* +// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* +// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* +// - Four DP (Floating Point) units. One on each slice. P9_DP_* +// This also includes fixed point multiply add. +// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* +// - Four Load/Store Queues. P9_LS_* +// - Each set of instructions will require a number of these resources. +//===----------------------------------------------------------------------===// + +// Two cycle ALU vector operation that uses an entire superslice. +// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines +// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. +def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + (instregex "VADDU(B|H|W|D)M$"), + (instregex "VAND(C)?$"), + (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), + (instregex "V_SET0(B|H)?$"), + (instregex "VS(R|L)(B|H|W|D)$"), + (instregex "VSUBU(B|H|W|D)M$"), + (instregex "VPOPCNT(B|H)$"), + (instregex "VRL(B|H|W|D)$"), + (instregex "VSRA(B|H|W|D)$"), + (instregex "XV(N)?ABS(D|S)P$"), + (instregex "XVCPSGN(D|S)P$"), + (instregex "XV(I|X)EXP(D|S)P$"), + (instregex "VRL(D|W)(MI|NM)$"), + (instregex "VMRG(E|O)W$"), + MTVSRDD, + VEQV, + VNAND, + VNEGD, + VNEGW, + VNOR, + VOR, + VORC, + VSEL, + VXOR, + XVNEGDP, + XVNEGSP, + XXLAND, + XXLANDC, + XXLEQV, + XXLEQVOnes, + XXLNAND, + XXLNOR, + XXLOR, + XXLORf, + XXLORC, + XXLXOR, + XXLXORdpz, + XXLXORspz, + XXLXORz, + XXSEL, + XSABSQP, + XSCPSGNQP, + XSIEXPQP, + XSNABSQP, + XSNEGQP, + XSXEXPQP +)>; + +// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a +// single slice. However, since it is Restricted, it requires all 3 dispatches +// (DISP) for that superslice. +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + (instregex "TABORT(D|W)C(I)?$"), + (instregex "MTFSB(0|1)$"), + (instregex "MFFSC(D)?RN(I)?$"), + (instregex "CMPRB(8)?$"), + (instregex "TD(I)?$"), + (instregex "TW(I)?$"), (instregex "FCMP(O|U)(S|D)$"), - (instregex "XSTSTDC(S|D)P$"), - FTDIV, - FTSQRT, - CMPEQB -)>; - -// Standard Dispatch ALU operation for 3 cycles. Only one slice used. -def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], - (instrs - (instregex "XSMAX(C|J)?DP$"), - (instregex "XSMIN(C|J)?DP$"), - (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), - (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), - (instregex "POPCNT(D|W)$"), - (instregex "CMPB(8)?$"), - (instregex "SETB(8)?$"), - XSTDIVDP, - XSTSQRTDP, - XSXSIGDP, - XSCVSPDPN, - BPERMD -)>; - -// Standard Dispatch ALU operation for 2 cycles. Only one slice used. -def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], - (instrs - (instregex "S(L|R)D$"), - (instregex "SRAD(I)?$"), - (instregex "EXTSWSLI_32_64$"), - (instregex "MFV(S)?RD$"), - (instregex "MTV(S)?RD$"), - (instregex "MTV(S)?RW(A|Z)$"), - (instregex "CMP(WI|LWI|W|LW)(8)?$"), - (instregex "CMP(L)?D(I)?$"), - (instregex "SUBF(I)?C(8)?(O)?$"), - (instregex "ANDI(S)?(8)?(_rec)?$"), - (instregex "ADDC(8)?(O)?$"), - (instregex "ADDIC(8)?(_rec)?$"), - (instregex "ADD(8|4)(O)?(_rec)?$"), - (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), - (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), - (instregex "NEG(8)?(O)?(_rec)?$"), - (instregex "POPCNTB$"), - (instregex "ADD(I|IS)?(8)?$"), - (instregex "LI(S)?(8)?$"), - (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), - (instregex "NAND(8)?(_rec)?$"), - (instregex "AND(C)?(8)?(_rec)?$"), - (instregex "NOR(8)?(_rec)?$"), - (instregex "OR(C)?(8)?(_rec)?$"), - (instregex "EQV(8)?(_rec)?$"), - (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), - (instregex "ADD(4|8)(TLS)?(_)?$"), - (instregex "NEG(8)?(O)?$"), - (instregex "ADDI(S)?toc(HA|L)(8)?$"), - COPY, - MCRF, - MCRXRX, - XSNABSDP, - XSXEXPDP, - XSABSDP, - XSNEGDP, - XSCPSGNDP, - MFVSRWZ, - MFVRWZ, - EXTSWSLI, - SRADI_32, - RLDIC, - RFEBB, - LA, - TBEGIN, - TRECHKPT, - NOP, - WAIT -)>; - -// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a -// single slice. However, since it is Restricted, it requires all 3 dispatches -// (DISP) for that superslice. -def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - (instregex "RLDC(L|R)$"), - (instregex "RLWIMI(8)?$"), - (instregex "RLDIC(L|R)(_32)?(_64)?$"), - (instregex "M(F|T)OCRF(8)?$"), - (instregex "CR(6)?(UN)?SET$"), - (instregex "CR(N)?(OR|AND)(C)?$"), - (instregex "S(L|R)W(8)?$"), - (instregex "RLW(INM|NM)(8)?$"), - (instregex "F(N)?ABS(D|S)$"), - (instregex "FNEG(D|S)$"), - (instregex "FCPSGN(D|S)$"), - (instregex "SRAW(I)?$"), - (instregex "ISEL(8)?$"), - RLDIMI, - XSIEXPDP, - FMR, - CREQV, - CRXOR, - TRECLAIM, - TSR, - TABORT -)>; - -// Three cycle ALU vector operation that uses an entire superslice. -// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines -// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. -def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - (instregex "M(T|F)VSCR$"), - (instregex "VCMPNEZ(B|H|W)$"), - (instregex "VCMPEQU(B|H|W|D)$"), - (instregex "VCMPNE(B|H|W)$"), - (instregex "VABSDU(B|H|W)$"), - (instregex "VADDU(B|H|W)S$"), - (instregex "VAVG(S|U)(B|H|W)$"), - (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), - (instregex "VCMPBFP(_rec)?$"), - (instregex "VC(L|T)Z(B|H|W|D)$"), - (instregex "VADDS(B|H|W)S$"), - (instregex "V(MIN|MAX)FP$"), - (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), - VBPERMD, - VADDCUW, - VPOPCNTW, - VPOPCNTD, - VPRTYBD, - VPRTYBW, - VSHASIGMAD, - VSHASIGMAW, - VSUBSBS, - VSUBSHS, - VSUBSWS, - VSUBUBS, - VSUBUHS, - VSUBUWS, - VSUBCUW, - VCMPGTSB, - VCMPGTSB_rec, - VCMPGTSD, - VCMPGTSD_rec, - VCMPGTSH, - VCMPGTSH_rec, - VCMPGTSW, - VCMPGTSW_rec, - VCMPGTUB, - VCMPGTUB_rec, - VCMPGTUD, - VCMPGTUD_rec, - VCMPGTUH, - VCMPGTUH_rec, - VCMPGTUW, - VCMPGTUW_rec, - VCMPNEB_rec, - VCMPNEH_rec, - VCMPNEW_rec, - VCMPNEZB_rec, - VCMPNEZH_rec, - VCMPNEZW_rec, - VCMPEQUB_rec, - VCMPEQUD_rec, - VCMPEQUH_rec, - VCMPEQUW_rec, - XVCMPEQDP, - XVCMPEQDP_rec, - XVCMPEQSP, - XVCMPEQSP_rec, - XVCMPGEDP, - XVCMPGEDP_rec, - XVCMPGESP, - XVCMPGESP_rec, - XVCMPGTDP, - XVCMPGTDP_rec, - XVCMPGTSP, - XVCMPGTSP_rec, - XVMAXDP, - XVMAXSP, - XVMINDP, - XVMINSP, - XVTDIVDP, - XVTDIVSP, - XVTSQRTDP, - XVTSQRTSP, - XVTSTDCDP, - XVTSTDCSP, - XVXSIGDP, - XVXSIGSP -)>; - -// 7 cycle DP vector operation that uses an entire superslice. -// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, -// EXECO) and all three dispatches (DISP) to the given superslice. -def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - VADDFP, - VCTSXS, - VCTSXS_0, - VCTUXS, - VCTUXS_0, - VEXPTEFP, - VLOGEFP, - VMADDFP, - VMHADDSHS, - VNMSUBFP, - VREFP, - VRFIM, - VRFIN, - VRFIP, - VRFIZ, - VRSQRTEFP, - VSUBFP, - XVADDDP, - XVADDSP, - XVCVDPSP, - XVCVDPSXDS, - XVCVDPSXWS, - XVCVDPUXDS, - XVCVDPUXWS, - XVCVHPSP, - XVCVSPDP, - XVCVSPHP, - XVCVSPSXDS, - XVCVSPSXWS, - XVCVSPUXDS, - XVCVSPUXWS, - XVCVSXDDP, - XVCVSXDSP, - XVCVSXWDP, - XVCVSXWSP, - XVCVUXDDP, - XVCVUXDSP, - XVCVUXWDP, - XVCVUXWSP, - XVMADDADP, - XVMADDASP, - XVMADDMDP, - XVMADDMSP, - XVMSUBADP, - XVMSUBASP, - XVMSUBMDP, - XVMSUBMSP, - XVMULDP, - XVMULSP, - XVNMADDADP, - XVNMADDASP, - XVNMADDMDP, - XVNMADDMSP, - XVNMSUBADP, - XVNMSUBASP, - XVNMSUBMDP, - XVNMSUBMSP, - XVRDPI, - XVRDPIC, - XVRDPIM, - XVRDPIP, - XVRDPIZ, - XVREDP, - XVRESP, - XVRSPI, - XVRSPIC, - XVRSPIM, - XVRSPIP, - XVRSPIZ, - XVRSQRTEDP, - XVRSQRTESP, - XVSUBDP, - XVSUBSP, - VCFSX, - VCFSX_0, - VCFUX, - VCFUX_0, - VMHRADDSHS, - VMLADDUHM, - VMSUMMBM, - VMSUMSHM, - VMSUMSHS, - VMSUMUBM, - VMSUMUHM, - VMSUMUDM, - VMSUMUHS, - VMULESB, - VMULESH, - VMULESW, - VMULEUB, - VMULEUH, - VMULEUW, - VMULOSB, - VMULOSH, - VMULOSW, - VMULOUB, - VMULOUH, - VMULOUW, - VMULUWM, - VSUM2SWS, - VSUM4SBS, - VSUM4SHS, - VSUM4UBS, - VSUMSWS -)>; - -// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three -// dispatch units for the superslice. -def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - (instregex "MADD(HD|HDU|LD|LD8)$"), - (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") -)>; - -// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three -// dispatch units for the superslice. -def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - FRSP, - (instregex "FRI(N|P|Z|M)(D|S)$"), - (instregex "FRE(S)?$"), - (instregex "FADD(S)?$"), - (instregex "FMSUB(S)?$"), - (instregex "FMADD(S)?$"), - (instregex "FSUB(S)?$"), - (instregex "FCFID(U)?(S)?$"), - (instregex "FCTID(U)?(Z)?$"), - (instregex "FCTIW(U)?(Z)?$"), - (instregex "FRSQRTE(S)?$"), - FNMADDS, - FNMADD, - FNMSUBS, - FNMSUB, - FSELD, - FSELS, - FMULS, - FMUL, - XSMADDADP, - XSMADDASP, - XSMADDMDP, - XSMADDMSP, - XSMSUBADP, - XSMSUBASP, - XSMSUBMDP, - XSMSUBMSP, - XSMULDP, - XSMULSP, - XSNMADDADP, - XSNMADDASP, - XSNMADDMDP, - XSNMADDMSP, - XSNMSUBADP, - XSNMSUBASP, - XSNMSUBMDP, - XSNMSUBMSP -)>; - -// 7 cycle Restricted DP operation and one 3 cycle ALU operation. -// These operations can be done in parallel. The DP is restricted so we need a -// full 4 dispatches. -def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "FSEL(D|S)_rec$") -)>; - -// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. -def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") -)>; - -// 7 cycle Restricted DP operation and one 3 cycle ALU operation. -// These operations must be done sequentially.The DP is restricted so we need a -// full 4 dispatches. -def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "FRI(N|P|Z|M)(D|S)_rec$"), - (instregex "FRE(S)?_rec$"), - (instregex "FADD(S)?_rec$"), - (instregex "FSUB(S)?_rec$"), - (instregex "F(N)?MSUB(S)?_rec$"), - (instregex "F(N)?MADD(S)?_rec$"), - (instregex "FCFID(U)?(S)?_rec$"), - (instregex "FCTID(U)?(Z)?_rec$"), - (instregex "FCTIW(U)?(Z)?_rec$"), - (instregex "FMUL(S)?_rec$"), - (instregex "FRSQRTE(S)?_rec$"), - FRSP_rec -)>; - -// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. -def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], - (instrs - XSADDDP, - XSADDSP, - XSCVDPHP, - XSCVDPSP, - XSCVDPSXDS, - XSCVDPSXDSs, - XSCVDPSXWS, - XSCVDPUXDS, - XSCVDPUXDSs, - XSCVDPUXWS, - XSCVDPSXWSs, - XSCVDPUXWSs, - XSCVHPDP, - XSCVSPDP, - XSCVSXDDP, - XSCVSXDSP, - XSCVUXDDP, - XSCVUXDSP, - XSRDPI, - XSRDPIC, - XSRDPIM, - XSRDPIP, - XSRDPIZ, - XSREDP, - XSRESP, - XSRSQRTEDP, - XSRSQRTESP, - XSSUBDP, - XSSUBSP, - XSCVDPSPN, - XSRSP -)>; - -// Three Cycle PM operation. Only one PM unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], - (instrs - (instregex "LVS(L|R)$"), - (instregex "VSPLTIS(W|H|B)$"), - (instregex "VSPLT(W|H|B)(s)?$"), - (instregex "V_SETALLONES(B|H)?$"), - (instregex "VEXTRACTU(B|H|W)$"), - (instregex "VINSERT(B|H|W|D)$"), - MFVSRLD, - MTVSRWS, - VBPERMQ, - VCLZLSBB, - VCTZLSBB, - VEXTRACTD, - VEXTUBLX, - VEXTUBRX, - VEXTUHLX, - VEXTUHRX, - VEXTUWLX, - VEXTUWRX, - VGBBD, - VMRGHB, - VMRGHH, - VMRGHW, - VMRGLB, - VMRGLH, - VMRGLW, - VPERM, - VPERMR, - VPERMXOR, - VPKPX, - VPKSDSS, - VPKSDUS, - VPKSHSS, - VPKSHUS, - VPKSWSS, - VPKSWUS, - VPKUDUM, - VPKUDUS, - VPKUHUM, - VPKUHUS, - VPKUWUM, - VPKUWUS, - VPRTYBQ, - VSL, - VSLDOI, - VSLO, - VSLV, - VSR, - VSRO, - VSRV, - VUPKHPX, - VUPKHSB, - VUPKHSH, - VUPKHSW, - VUPKLPX, - VUPKLSB, - VUPKLSH, - VUPKLSW, - XXBRD, - XXBRH, - XXBRQ, - XXBRW, - XXEXTRACTUW, - XXINSERTW, - XXMRGHW, - XXMRGLW, - XXPERM, - XXPERMR, - XXSLDWI, - XXSLDWIs, - XXSPLTIB, - XXSPLTW, - XXSPLTWs, - XXPERMDI, - XXPERMDIs, - VADDCUQ, - VADDECUQ, - VADDEUQM, - VADDUQM, - VMUL10CUQ, - VMUL10ECUQ, - VMUL10EUQ, - VMUL10UQ, - VSUBCUQ, - VSUBECUQ, - VSUBEUQM, - VSUBUQM, - XSCMPEXPQP, - XSCMPOQP, - XSCMPUQP, - XSTSTDCQP, - XSXSIGQP, - BCDCFN_rec, - BCDCFZ_rec, - BCDCPSGN_rec, - BCDCTN_rec, - BCDCTZ_rec, - BCDSETSGN_rec, - BCDS_rec, - BCDTRUNC_rec, - BCDUS_rec, - BCDUTRUNC_rec -)>; - -// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - BCDSR_rec, - XSADDQP, - XSADDQPO, - XSCVDPQP, - XSCVQPDP, - XSCVQPDPO, - XSCVQPSDZ, - XSCVQPSWZ, - XSCVQPUDZ, - XSCVQPUWZ, - XSCVSDQP, - XSCVUDQP, - XSRQPI, - XSRQPIX, - XSRQPXP, - XSSUBQP, - XSSUBQPO -)>; - -// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - BCDCTSQ_rec -)>; - -// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - XSMADDQP, - XSMADDQPO, - XSMSUBQP, - XSMSUBQPO, - XSMULQP, - XSMULQPO, - XSNMADDQP, - XSNMADDQPO, - XSNMSUBQP, - XSNMSUBQPO -)>; - -// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - BCDCFSQ_rec -)>; - -// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - XSDIVQP, - XSDIVQPO -)>; - -// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], - (instrs - XSSQRTQP, - XSSQRTQPO -)>; - -// 6 Cycle Load uses a single slice. -def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], - (instrs - (instregex "LXVL(L)?") -)>; - -// 5 Cycle Load uses a single slice. -def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], - (instrs - (instregex "LVE(B|H|W)X$"), - (instregex "LVX(L)?"), - (instregex "LXSI(B|H)ZX$"), - LXSDX, - LXVB16X, - LXVD2X, - LXVWSX, - LXSIWZX, - LXV, - LXVX, - LXSD, - DFLOADf64, - XFLOADf64, - LIWZX -)>; - -// 4 Cycle Load uses a single slice. -def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], - (instrs - (instregex "DCB(F|T|ST)(EP)?$"), - (instregex "DCBZ(L)?(EP)?$"), - (instregex "DCBTST(EP)?$"), - (instregex "CP_COPY(8)?$"), - (instregex "CP_PASTE(8)?$"), - (instregex "ICBI(EP)?$"), - (instregex "ICBT(LS)?$"), - (instregex "LBARX(L)?$"), - (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), - (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), - (instregex "LH(A|B)RX(L)?(8)?$"), - (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), - (instregex "LWARX(L)?$"), - (instregex "LWBRX(8)?$"), - (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), - CP_ABORT, - DARN, - EnforceIEIO, - ISYNC, - MSGSYNC, - TLBSYNC, - SYNC, - LMW, - LSWI -)>; - -// 4 Cycle Restricted load uses a single slice but the dispatch for the whole -// superslice. -def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], - (instrs - LFIWZX, - LFDX, - LFD -)>; - -// Cracked Load Instructions. -// Load instructions that can be done in parallel. -def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, - DISP_PAIR_1C], - (instrs - SLBIA, - SLBIE, - SLBMFEE, - SLBMFEV, - SLBMTE, - TLBIEL -)>; - -// Cracked Load Instruction. -// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations can be run in parallel. -def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, - DISP_PAIR_1C, DISP_PAIR_1C], - (instrs - (instregex "L(W|H)ZU(X)?(8)?$") -)>; - -// Cracked TEND Instruction. -// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations can be run in parallel. -def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C], - (instrs - TEND -)>; - - -// Cracked Store Instruction -// Consecutive Store and ALU instructions. The store is restricted and requires -// three dispatches. -def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "ST(B|H|W|D)CX$") -)>; - -// Cracked Load Instruction. -// Two consecutive load operations for a total of 8 cycles. -def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, - DISP_1C, DISP_1C], - (instrs - LDMX -)>; - -// Cracked Load instruction. -// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. -def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, - DISP_1C, DISP_1C], - (instrs - (instregex "LHA(X)?(8)?$"), - (instregex "CP_PASTE(8)?_rec$"), - (instregex "LWA(X)?(_32)?$"), - TCHECK -)>; - -// Cracked Restricted Load instruction. -// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. -// Full 6 dispatches are required as this is both cracked and restricted. -def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - LFIWAX -)>; - -// Cracked Load instruction. -// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. -// Full 4 dispatches are required as this is a cracked instruction. -def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], - (instrs - LXSIWAX, - LIWAX -)>; - -// Cracked Load instruction. -// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 -// cycles. The Load and ALU operations cannot be done at the same time and so -// their latencies are added. -// Full 6 dispatches are required as this is a restricted instruction. -def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - LFSX, - LFS -)>; - -// Cracked Load instruction. -// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU -// operations cannot be done at the same time and so their latencies are added. -// Full 4 dispatches are required as this is a cracked instruction. -def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], - (instrs - LXSSP, - LXSSPX, - XFLOADf32, - DFLOADf32 -)>; - -// Cracked 3-Way Load Instruction -// Load with two ALU operations that depend on each other -def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], - (instrs - (instregex "LHAU(X)?(8)?$"), - LWAUX -)>; - -// Cracked Load that requires the PM resource. -// Since the Load and the PM cannot be done at the same time the latencies are -// added. Requires 8 cycles. Since the PM requires the full superslice we need -// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load -// requires the remaining 1 dispatch. -def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C, DISP_1C], - (instrs - LXVH8X, - LXVDSX, - LXVW4X -)>; - -// Single slice Restricted store operation. The restricted operation requires -// all three dispatches for the superslice. -def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], - (instrs - (instregex "STF(S|D|IWX|SX|DX)$"), - (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), - (instregex "STW(8)?$"), - (instregex "(D|X)FSTORE(f32|f64)$"), - (instregex "ST(W|H|D)BRX$"), - (instregex "ST(B|H|D)(8)?$"), - (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), - STIWX, - SLBIEG, - STMW, - STSWI, - TLBIE -)>; - -// Vector Store Instruction -// Requires the whole superslice and therefore requires one dispatch -// as well as both the Even and Odd exec pipelines. -def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], - (instrs - (instregex "STVE(B|H|W)X$"), - (instregex "STVX(L)?$"), - (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") -)>; - -// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and two -// dispatches. -def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], - (instrs - (instregex "MTCTR(8)?(loop)?$"), - (instregex "MTLR(8)?$") -)>; - -// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and two -// dispatches. -def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], - (instrs - (instregex "M(T|F)VRSAVE(v)?$"), - (instregex "M(T|F)PMR$"), - (instregex "M(T|F)TB(8)?$"), - (instregex "MF(SPR|CTR|LR)(8)?$"), - (instregex "M(T|F)MSR(D)?$"), - (instregex "MTSPR(8)?$") -)>; - -// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and two -// dispatches. -def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], - (instrs - DIVW, - DIVWO, - DIVWU, - DIVWUO, - MODSW -)>; - -// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and two -// dispatches. -def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], - (instrs - DIVWE, - DIVWEO, - DIVD, - DIVDO, - DIVWEU, - DIVWEUO, - DIVDU, - DIVDUO, - MODSD, - MODUD, - MODUW -)>; - -// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and all three -// dispatches. -def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], - (instrs - DIVDE, - DIVDEO, - DIVDEU, - DIVDEUO -)>; - -// Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV per -// superslice. Latency of DIV plus ALU is 26. -def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_EVEN_1C, DISP_1C], - (instrs - (instregex "DIVW(U)?(O)?_rec$") -)>; - -// Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV per -// superslice. Latency of DIV plus ALU is 26. -def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_EVEN_1C, DISP_1C], - (instrs - DIVD_rec, - DIVDO_rec, - DIVDU_rec, - DIVDUO_rec, - DIVWE_rec, - DIVWEO_rec, - DIVWEU_rec, - DIVWEUO_rec -)>; - -// Cracked DIV and ALU operation. Requires one full slice for the ALU operation -// and one full superslice for the DIV operation since there is only one DIV per -// superslice. Latency of DIV plus ALU is 42. -def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, - DISP_EVEN_1C, DISP_1C], - (instrs - DIVDE_rec, - DIVDEO_rec, - DIVDEU_rec, - DIVDEUO_rec -)>; - -// CR access instructions in _BrMCR, IIC_BrMCRX. - -// Cracked, restricted, ALU operations. -// Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 6 dispatches. ALU ops are -// 2 cycles each. -def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - MTCRF, - MTCRF8 -)>; - -// Cracked ALU operations. -// Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 2 dispatches. ALU ops are -// 2 cycles each. -def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C], - (instrs - (instregex "ADDC(8)?(O)?_rec$"), - (instregex "SUBFC(8)?(O)?_rec$") -)>; - -// Cracked ALU operations. -// Two ALU ops can be done in parallel. -// One is three cycle ALU the ohter is a two cycle ALU. -// One of the ALU ops is restricted the other is not so we have a total of -// 5 dispatches. -def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "F(N)?ABS(D|S)_rec$"), - (instregex "FCPSGN(D|S)_rec$"), - (instregex "FNEG(D|S)_rec$"), - FMR_rec -)>; - -// Cracked ALU operations. -// Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 2 dispatches. -// ALU ops are 3 cycles each. -def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_1C, DISP_1C], - (instrs - MCRFS -)>; - -// Cracked Restricted ALU operations. -// Here the two ALU ops can actually be done in parallel and therefore the -// latencies are not added together. Otherwise this is like having two -// instructions running together on two pipelines and 6 dispatches. -// ALU ops are 3 cycles each. -def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - (instregex "MTFSF(b|_rec)?$"), - (instregex "MTFSFI(_rec)?$") -)>; - -// Cracked instruction made of two ALU ops. -// The two ops cannot be done in parallel. -// One of the ALU ops is restricted and takes 3 dispatches. -def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "RLD(I)?C(R|L)_rec$"), - (instregex "RLW(IMI|INM|NM)(8)?_rec$"), - (instregex "SLW(8)?_rec$"), - (instregex "SRAW(I)?_rec$"), - (instregex "SRW(8)?_rec$"), - RLDICL_32_rec, - RLDIMI_rec -)>; - -// Cracked instruction made of two ALU ops. -// The two ops cannot be done in parallel. -// Both of the ALU ops are restricted and take 3 dispatches. -def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - (instregex "MFFS(L|CE|_rec)?$") -)>; - -// Cracked ALU instruction composed of three consecutive 2 cycle loads for a -// total of 6 cycles. All of the ALU operations are also restricted so each -// takes 3 dispatches for a total of 9. -def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], - (instrs - (instregex "MFCR(8)?$") -)>; - -// Cracked instruction made of two ALU ops. -// The two ops cannot be done in parallel. -def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], - (instrs - (instregex "EXTSWSLI_32_64_rec$"), - (instregex "SRAD(I)?_rec$"), - EXTSWSLI_rec, - SLD_rec, - SRD_rec, - RLDIC_rec -)>; - -// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - FDIV -)>; - -// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. -def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - FDIV_rec -)>; - -// 36 Cycle DP Instruction. -// Instruction can be done on a single slice. -def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], - (instrs - XSSQRTDP -)>; - -// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - FSQRT -)>; - -// 36 Cycle DP Vector Instruction. -def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C], - (instrs - XVSQRTDP -)>; - -// 27 Cycle DP Vector Instruction. -def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C], - (instrs - XVSQRTSP -)>; - -// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. -def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - FSQRT_rec -)>; - -// 26 Cycle DP Instruction. -def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], - (instrs - XSSQRTSP -)>; - -// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - FSQRTS -)>; - -// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. -def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - FSQRTS_rec -)>; - -// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. -def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], - (instrs - XSDIVDP -)>; - -// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. -def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], - (instrs - FDIVS -)>; - -// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. -def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - FDIVS_rec -)>; - -// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. -def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], - (instrs - XSDIVSP -)>; - -// 24 Cycle DP Vector Instruction. Takes one full superslice. -// Includes both EXECE, EXECO pipelines and 1 dispatch for the given -// superslice. -def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C], - (instrs - XVDIVSP -)>; - -// 33 Cycle DP Vector Instruction. Takes one full superslice. -// Includes both EXECE, EXECO pipelines and 1 dispatch for the given -// superslice. -def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, - DISP_1C], - (instrs - XVDIVDP -)>; - -// Instruction cracked into three pieces. One Load and two ALU operations. -// The Load and one of the ALU ops cannot be run at the same time and so the -// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. -// Both the load and the ALU that depends on it are restricted and so they take -// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. -// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. -def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, - IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "LF(SU|SUX)$") -)>; - -// Cracked instruction made up of a Store and an ALU. The ALU does not depend on -// the store and so it can be run at the same time as the store. The store is -// also restricted. -def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "STF(S|D)U(X)?$"), - (instregex "ST(B|H|W|D)U(X)?(8)?$") -)>; - -// Cracked instruction made up of a Load and an ALU. The ALU does not depend on -// the load and so it can be run at the same time as the load. -def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, - DISP_PAIR_1C, DISP_PAIR_1C], - (instrs - (instregex "LBZU(X)?(8)?$"), - (instregex "LDU(X)?$") -)>; - -// Cracked instruction made up of a Load and an ALU. The ALU does not depend on -// the load and so it can be run at the same time as the load. The load is also -// restricted. 3 dispatches are from the restricted load while the other two -// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline -// is required for the ALU. -def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, - DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "LF(DU|DUX)$") -)>; - -// Crypto Instructions - -// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole -// superslice. That includes both exec pipelines (EXECO, EXECE) and one -// dispatch. -def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], - (instrs - (instregex "VPMSUM(B|H|W|D)$"), - (instregex "V(N)?CIPHER(LAST)?$"), - VSBOX -)>; - -// Branch Instructions - -// Two Cycle Branch -def : InstRW<[P9_BR_2C, DISP_BR_1C], - (instrs - (instregex "BCCCTR(L)?(8)?$"), - (instregex "BCCL(A|R|RL)?$"), - (instregex "BCCTR(L)?(8)?(n)?$"), - (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), - (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), - (instregex "BL(_TLS|_NOP)?$"), - (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), - (instregex "BLA(8|8_NOP)?$"), - (instregex "BLR(8|L)?$"), - (instregex "TAILB(A)?(8)?$"), - (instregex "TAILBCTR(8)?$"), - (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), - (instregex "BCLR(L)?(n)?$"), - (instregex "BCTR(L)?(8)?$"), - B, - BA, - BC, - BCC, - BCCA, - BCL, - BCLalways, - BCLn, - BCTRL8_LDinto_toc, - BCTRL_LWZinto_toc, - BCn, - CTRL_DEP -)>; - -// Five Cycle Branch with a 2 Cycle ALU Op -// Operations must be done consecutively and not in parallel. -def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], - (instrs - ADDPCIS -)>; - -// Special Extracted Instructions For Atomics - -// Atomic Load -def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, - IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, - IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, - DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], - (instrs - (instregex "L(D|W)AT$") -)>; - -// Atomic Store -def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, - IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], - (instrs - (instregex "ST(D|W)AT$") -)>; - -// Signal Processing Engine (SPE) Instructions -// These instructions are not supported on Power 9 -def : InstRW<[], - (instrs - BRINC, - EVABS, - EVEQV, - EVMRA, - EVNAND, - EVNEG, - (instregex "EVADD(I)?W$"), - (instregex "EVADD(SM|SS|UM|US)IAAW$"), - (instregex "EVAND(C)?$"), - (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), - (instregex "EVCNTL(S|Z)W$"), - (instregex "EVDIVW(S|U)$"), - (instregex "EVEXTS(B|H)$"), - (instregex "EVLD(H|W|D)(X)?$"), - (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), - (instregex "EVLWHE(X)?$"), - (instregex "EVLWHO(S|U)(X)?$"), - (instregex "EVLW(H|W)SPLAT(X)?$"), - (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), - (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), - (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), - (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), - (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), - (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), - (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), - (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), - (instregex "EVMWHUMI(A)?$"), - (instregex "EVMWLS(M|S)IA(A|N)W$"), - (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), - (instregex "EVMWSM(F|I)(A|AA|AN)?$"), - (instregex "EVMWSSF(A|AA|AN)?$"), - (instregex "EVMWUMI(A|AA|AN)?$"), - (instregex "EV(N|X)?OR(C)?$"), - (instregex "EVR(LW|LWI|NDW)$"), - (instregex "EVSLW(I)?$"), - (instregex "EVSPLAT(F)?I$"), - (instregex "EVSRW(I)?(S|U)$"), - (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), - (instregex "EVSUBF(S|U)(M|S)IAAW$"), - (instregex "EVSUB(I)?FW$") -)> { let Unsupported = 1; } - -// General Instructions without scheduling support. -def : InstRW<[], - (instrs - (instregex "(H)?RFI(D)?$"), - (instregex "DSS(ALL)?$"), - (instregex "DST(ST)?(T)?(64)?$"), - (instregex "ICBL(C|Q)$"), - (instregex "L(W|H|B)EPX$"), - (instregex "ST(W|H|B)EPX$"), - (instregex "(L|ST)FDEPX$"), - (instregex "M(T|F)SR(IN)?$"), - (instregex "M(T|F)DCR$"), - (instregex "NOP_GT_PWR(6|7)$"), - (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), - (instregex "WRTEE(I)?$"), - ATTN, - CLRBHRB, - MFBHRBE, - MBAR, - MSYNC, - SLBSYNC, - SLBFEE_rec, - NAP, - STOP, - TRAP, - RFCI, - RFDI, - RFMCI, - SC, - DCBA, - DCBI, - DCCCI, - ICCCI -)> { let Unsupported = 1; } + (instregex "XSTSTDC(S|D)P$"), + FTDIV, + FTSQRT, + CMPEQB +)>; + +// Standard Dispatch ALU operation for 3 cycles. Only one slice used. +def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], + (instrs + (instregex "XSMAX(C|J)?DP$"), + (instregex "XSMIN(C|J)?DP$"), + (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), + (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), + (instregex "POPCNT(D|W)$"), + (instregex "CMPB(8)?$"), + (instregex "SETB(8)?$"), + XSTDIVDP, + XSTSQRTDP, + XSXSIGDP, + XSCVSPDPN, + BPERMD +)>; + +// Standard Dispatch ALU operation for 2 cycles. Only one slice used. +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], + (instrs + (instregex "S(L|R)D$"), + (instregex "SRAD(I)?$"), + (instregex "EXTSWSLI_32_64$"), + (instregex "MFV(S)?RD$"), + (instregex "MTV(S)?RD$"), + (instregex "MTV(S)?RW(A|Z)$"), + (instregex "CMP(WI|LWI|W|LW)(8)?$"), + (instregex "CMP(L)?D(I)?$"), + (instregex "SUBF(I)?C(8)?(O)?$"), + (instregex "ANDI(S)?(8)?(_rec)?$"), + (instregex "ADDC(8)?(O)?$"), + (instregex "ADDIC(8)?(_rec)?$"), + (instregex "ADD(8|4)(O)?(_rec)?$"), + (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), + (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), + (instregex "NEG(8)?(O)?(_rec)?$"), + (instregex "POPCNTB$"), + (instregex "ADD(I|IS)?(8)?$"), + (instregex "LI(S)?(8)?$"), + (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), + (instregex "NAND(8)?(_rec)?$"), + (instregex "AND(C)?(8)?(_rec)?$"), + (instregex "NOR(8)?(_rec)?$"), + (instregex "OR(C)?(8)?(_rec)?$"), + (instregex "EQV(8)?(_rec)?$"), + (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), + (instregex "ADD(4|8)(TLS)?(_)?$"), + (instregex "NEG(8)?(O)?$"), + (instregex "ADDI(S)?toc(HA|L)(8)?$"), + COPY, + MCRF, + MCRXRX, + XSNABSDP, + XSXEXPDP, + XSABSDP, + XSNEGDP, + XSCPSGNDP, + MFVSRWZ, + MFVRWZ, + EXTSWSLI, + SRADI_32, + RLDIC, + RFEBB, + LA, + TBEGIN, + TRECHKPT, + NOP, + WAIT +)>; + +// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a +// single slice. However, since it is Restricted, it requires all 3 dispatches +// (DISP) for that superslice. +def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + (instregex "RLDC(L|R)$"), + (instregex "RLWIMI(8)?$"), + (instregex "RLDIC(L|R)(_32)?(_64)?$"), + (instregex "M(F|T)OCRF(8)?$"), + (instregex "CR(6)?(UN)?SET$"), + (instregex "CR(N)?(OR|AND)(C)?$"), + (instregex "S(L|R)W(8)?$"), + (instregex "RLW(INM|NM)(8)?$"), + (instregex "F(N)?ABS(D|S)$"), + (instregex "FNEG(D|S)$"), + (instregex "FCPSGN(D|S)$"), + (instregex "SRAW(I)?$"), + (instregex "ISEL(8)?$"), + RLDIMI, + XSIEXPDP, + FMR, + CREQV, + CRXOR, + TRECLAIM, + TSR, + TABORT +)>; + +// Three cycle ALU vector operation that uses an entire superslice. +// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines +// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. +def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + (instregex "M(T|F)VSCR$"), + (instregex "VCMPNEZ(B|H|W)$"), + (instregex "VCMPEQU(B|H|W|D)$"), + (instregex "VCMPNE(B|H|W)$"), + (instregex "VABSDU(B|H|W)$"), + (instregex "VADDU(B|H|W)S$"), + (instregex "VAVG(S|U)(B|H|W)$"), + (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), + (instregex "VCMPBFP(_rec)?$"), + (instregex "VC(L|T)Z(B|H|W|D)$"), + (instregex "VADDS(B|H|W)S$"), + (instregex "V(MIN|MAX)FP$"), + (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), + VBPERMD, + VADDCUW, + VPOPCNTW, + VPOPCNTD, + VPRTYBD, + VPRTYBW, + VSHASIGMAD, + VSHASIGMAW, + VSUBSBS, + VSUBSHS, + VSUBSWS, + VSUBUBS, + VSUBUHS, + VSUBUWS, + VSUBCUW, + VCMPGTSB, + VCMPGTSB_rec, + VCMPGTSD, + VCMPGTSD_rec, + VCMPGTSH, + VCMPGTSH_rec, + VCMPGTSW, + VCMPGTSW_rec, + VCMPGTUB, + VCMPGTUB_rec, + VCMPGTUD, + VCMPGTUD_rec, + VCMPGTUH, + VCMPGTUH_rec, + VCMPGTUW, + VCMPGTUW_rec, + VCMPNEB_rec, + VCMPNEH_rec, + VCMPNEW_rec, + VCMPNEZB_rec, + VCMPNEZH_rec, + VCMPNEZW_rec, + VCMPEQUB_rec, + VCMPEQUD_rec, + VCMPEQUH_rec, + VCMPEQUW_rec, + XVCMPEQDP, + XVCMPEQDP_rec, + XVCMPEQSP, + XVCMPEQSP_rec, + XVCMPGEDP, + XVCMPGEDP_rec, + XVCMPGESP, + XVCMPGESP_rec, + XVCMPGTDP, + XVCMPGTDP_rec, + XVCMPGTSP, + XVCMPGTSP_rec, + XVMAXDP, + XVMAXSP, + XVMINDP, + XVMINSP, + XVTDIVDP, + XVTDIVSP, + XVTSQRTDP, + XVTSQRTSP, + XVTSTDCDP, + XVTSTDCSP, + XVXSIGDP, + XVXSIGSP +)>; + +// 7 cycle DP vector operation that uses an entire superslice. +// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, +// EXECO) and all three dispatches (DISP) to the given superslice. +def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + VADDFP, + VCTSXS, + VCTSXS_0, + VCTUXS, + VCTUXS_0, + VEXPTEFP, + VLOGEFP, + VMADDFP, + VMHADDSHS, + VNMSUBFP, + VREFP, + VRFIM, + VRFIN, + VRFIP, + VRFIZ, + VRSQRTEFP, + VSUBFP, + XVADDDP, + XVADDSP, + XVCVDPSP, + XVCVDPSXDS, + XVCVDPSXWS, + XVCVDPUXDS, + XVCVDPUXWS, + XVCVHPSP, + XVCVSPDP, + XVCVSPHP, + XVCVSPSXDS, + XVCVSPSXWS, + XVCVSPUXDS, + XVCVSPUXWS, + XVCVSXDDP, + XVCVSXDSP, + XVCVSXWDP, + XVCVSXWSP, + XVCVUXDDP, + XVCVUXDSP, + XVCVUXWDP, + XVCVUXWSP, + XVMADDADP, + XVMADDASP, + XVMADDMDP, + XVMADDMSP, + XVMSUBADP, + XVMSUBASP, + XVMSUBMDP, + XVMSUBMSP, + XVMULDP, + XVMULSP, + XVNMADDADP, + XVNMADDASP, + XVNMADDMDP, + XVNMADDMSP, + XVNMSUBADP, + XVNMSUBASP, + XVNMSUBMDP, + XVNMSUBMSP, + XVRDPI, + XVRDPIC, + XVRDPIM, + XVRDPIP, + XVRDPIZ, + XVREDP, + XVRESP, + XVRSPI, + XVRSPIC, + XVRSPIM, + XVRSPIP, + XVRSPIZ, + XVRSQRTEDP, + XVRSQRTESP, + XVSUBDP, + XVSUBSP, + VCFSX, + VCFSX_0, + VCFUX, + VCFUX_0, + VMHRADDSHS, + VMLADDUHM, + VMSUMMBM, + VMSUMSHM, + VMSUMSHS, + VMSUMUBM, + VMSUMUHM, + VMSUMUDM, + VMSUMUHS, + VMULESB, + VMULESH, + VMULESW, + VMULEUB, + VMULEUH, + VMULEUW, + VMULOSB, + VMULOSH, + VMULOSW, + VMULOUB, + VMULOUH, + VMULOUW, + VMULUWM, + VSUM2SWS, + VSUM4SBS, + VSUM4SHS, + VSUM4UBS, + VSUMSWS +)>; + +// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three +// dispatch units for the superslice. +def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + (instregex "MADD(HD|HDU|LD|LD8)$"), + (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") +)>; + +// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three +// dispatch units for the superslice. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + FRSP, + (instregex "FRI(N|P|Z|M)(D|S)$"), + (instregex "FRE(S)?$"), + (instregex "FADD(S)?$"), + (instregex "FMSUB(S)?$"), + (instregex "FMADD(S)?$"), + (instregex "FSUB(S)?$"), + (instregex "FCFID(U)?(S)?$"), + (instregex "FCTID(U)?(Z)?$"), + (instregex "FCTIW(U)?(Z)?$"), + (instregex "FRSQRTE(S)?$"), + FNMADDS, + FNMADD, + FNMSUBS, + FNMSUB, + FSELD, + FSELS, + FMULS, + FMUL, + XSMADDADP, + XSMADDASP, + XSMADDMDP, + XSMADDMSP, + XSMSUBADP, + XSMSUBASP, + XSMSUBMDP, + XSMSUBMSP, + XSMULDP, + XSMULSP, + XSNMADDADP, + XSNMADDASP, + XSNMADDMDP, + XSNMADDMSP, + XSNMSUBADP, + XSNMSUBASP, + XSNMSUBMDP, + XSNMSUBMSP +)>; + +// 7 cycle Restricted DP operation and one 3 cycle ALU operation. +// These operations can be done in parallel. The DP is restricted so we need a +// full 4 dispatches. +def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "FSEL(D|S)_rec$") +)>; + +// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. +def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") +)>; + +// 7 cycle Restricted DP operation and one 3 cycle ALU operation. +// These operations must be done sequentially.The DP is restricted so we need a +// full 4 dispatches. +def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "FRI(N|P|Z|M)(D|S)_rec$"), + (instregex "FRE(S)?_rec$"), + (instregex "FADD(S)?_rec$"), + (instregex "FSUB(S)?_rec$"), + (instregex "F(N)?MSUB(S)?_rec$"), + (instregex "F(N)?MADD(S)?_rec$"), + (instregex "FCFID(U)?(S)?_rec$"), + (instregex "FCTID(U)?(Z)?_rec$"), + (instregex "FCTIW(U)?(Z)?_rec$"), + (instregex "FMUL(S)?_rec$"), + (instregex "FRSQRTE(S)?_rec$"), + FRSP_rec +)>; + +// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. +def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], + (instrs + XSADDDP, + XSADDSP, + XSCVDPHP, + XSCVDPSP, + XSCVDPSXDS, + XSCVDPSXDSs, + XSCVDPSXWS, + XSCVDPUXDS, + XSCVDPUXDSs, + XSCVDPUXWS, + XSCVDPSXWSs, + XSCVDPUXWSs, + XSCVHPDP, + XSCVSPDP, + XSCVSXDDP, + XSCVSXDSP, + XSCVUXDDP, + XSCVUXDSP, + XSRDPI, + XSRDPIC, + XSRDPIM, + XSRDPIP, + XSRDPIZ, + XSREDP, + XSRESP, + XSRSQRTEDP, + XSRSQRTESP, + XSSUBDP, + XSSUBSP, + XSCVDPSPN, + XSRSP +)>; + +// Three Cycle PM operation. Only one PM unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], + (instrs + (instregex "LVS(L|R)$"), + (instregex "VSPLTIS(W|H|B)$"), + (instregex "VSPLT(W|H|B)(s)?$"), + (instregex "V_SETALLONES(B|H)?$"), + (instregex "VEXTRACTU(B|H|W)$"), + (instregex "VINSERT(B|H|W|D)$"), + MFVSRLD, + MTVSRWS, + VBPERMQ, + VCLZLSBB, + VCTZLSBB, + VEXTRACTD, + VEXTUBLX, + VEXTUBRX, + VEXTUHLX, + VEXTUHRX, + VEXTUWLX, + VEXTUWRX, + VGBBD, + VMRGHB, + VMRGHH, + VMRGHW, + VMRGLB, + VMRGLH, + VMRGLW, + VPERM, + VPERMR, + VPERMXOR, + VPKPX, + VPKSDSS, + VPKSDUS, + VPKSHSS, + VPKSHUS, + VPKSWSS, + VPKSWUS, + VPKUDUM, + VPKUDUS, + VPKUHUM, + VPKUHUS, + VPKUWUM, + VPKUWUS, + VPRTYBQ, + VSL, + VSLDOI, + VSLO, + VSLV, + VSR, + VSRO, + VSRV, + VUPKHPX, + VUPKHSB, + VUPKHSH, + VUPKHSW, + VUPKLPX, + VUPKLSB, + VUPKLSH, + VUPKLSW, + XXBRD, + XXBRH, + XXBRQ, + XXBRW, + XXEXTRACTUW, + XXINSERTW, + XXMRGHW, + XXMRGLW, + XXPERM, + XXPERMR, + XXSLDWI, + XXSLDWIs, + XXSPLTIB, + XXSPLTW, + XXSPLTWs, + XXPERMDI, + XXPERMDIs, + VADDCUQ, + VADDECUQ, + VADDEUQM, + VADDUQM, + VMUL10CUQ, + VMUL10ECUQ, + VMUL10EUQ, + VMUL10UQ, + VSUBCUQ, + VSUBECUQ, + VSUBEUQM, + VSUBUQM, + XSCMPEXPQP, + XSCMPOQP, + XSCMPUQP, + XSTSTDCQP, + XSXSIGQP, + BCDCFN_rec, + BCDCFZ_rec, + BCDCPSGN_rec, + BCDCTN_rec, + BCDCTZ_rec, + BCDSETSGN_rec, + BCDS_rec, + BCDTRUNC_rec, + BCDUS_rec, + BCDUTRUNC_rec +)>; + +// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + BCDSR_rec, + XSADDQP, + XSADDQPO, + XSCVDPQP, + XSCVQPDP, + XSCVQPDPO, + XSCVQPSDZ, + XSCVQPSWZ, + XSCVQPUDZ, + XSCVQPUWZ, + XSCVSDQP, + XSCVUDQP, + XSRQPI, + XSRQPIX, + XSRQPXP, + XSSUBQP, + XSSUBQPO +)>; + +// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + BCDCTSQ_rec +)>; + +// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + XSMADDQP, + XSMADDQPO, + XSMSUBQP, + XSMSUBQPO, + XSMULQP, + XSMULQPO, + XSNMADDQP, + XSNMADDQPO, + XSNMSUBQP, + XSNMSUBQPO +)>; + +// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + BCDCFSQ_rec +)>; + +// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + XSDIVQP, + XSDIVQPO +)>; + +// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], + (instrs + XSSQRTQP, + XSSQRTQPO +)>; + +// 6 Cycle Load uses a single slice. +def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], + (instrs + (instregex "LXVL(L)?") +)>; + +// 5 Cycle Load uses a single slice. +def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], + (instrs + (instregex "LVE(B|H|W)X$"), + (instregex "LVX(L)?"), + (instregex "LXSI(B|H)ZX$"), + LXSDX, + LXVB16X, + LXVD2X, + LXVWSX, + LXSIWZX, + LXV, + LXVX, + LXSD, + DFLOADf64, + XFLOADf64, + LIWZX +)>; + +// 4 Cycle Load uses a single slice. +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], + (instrs + (instregex "DCB(F|T|ST)(EP)?$"), + (instregex "DCBZ(L)?(EP)?$"), + (instregex "DCBTST(EP)?$"), + (instregex "CP_COPY(8)?$"), + (instregex "CP_PASTE(8)?$"), + (instregex "ICBI(EP)?$"), + (instregex "ICBT(LS)?$"), + (instregex "LBARX(L)?$"), + (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), + (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), + (instregex "LH(A|B)RX(L)?(8)?$"), + (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), + (instregex "LWARX(L)?$"), + (instregex "LWBRX(8)?$"), + (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), + CP_ABORT, + DARN, + EnforceIEIO, + ISYNC, + MSGSYNC, + TLBSYNC, + SYNC, + LMW, + LSWI +)>; + +// 4 Cycle Restricted load uses a single slice but the dispatch for the whole +// superslice. +def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], + (instrs + LFIWZX, + LFDX, + LFD +)>; + +// Cracked Load Instructions. +// Load instructions that can be done in parallel. +def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, + DISP_PAIR_1C], + (instrs + SLBIA, + SLBIE, + SLBMFEE, + SLBMFEV, + SLBMTE, + TLBIEL +)>; + +// Cracked Load Instruction. +// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations can be run in parallel. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, + DISP_PAIR_1C, DISP_PAIR_1C], + (instrs + (instregex "L(W|H)ZU(X)?(8)?$") +)>; + +// Cracked TEND Instruction. +// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations can be run in parallel. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C], + (instrs + TEND +)>; + + +// Cracked Store Instruction +// Consecutive Store and ALU instructions. The store is restricted and requires +// three dispatches. +def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "ST(B|H|W|D)CX$") +)>; + +// Cracked Load Instruction. +// Two consecutive load operations for a total of 8 cycles. +def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, + DISP_1C, DISP_1C], + (instrs + LDMX +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C], + (instrs + (instregex "LHA(X)?(8)?$"), + (instregex "CP_PASTE(8)?_rec$"), + (instregex "LWA(X)?(_32)?$"), + TCHECK +)>; + +// Cracked Restricted Load instruction. +// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 6 dispatches are required as this is both cracked and restricted. +def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + LFIWAX +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 4 dispatches are required as this is a cracked instruction. +def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + LXSIWAX, + LIWAX +)>; + +// Cracked Load instruction. +// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 +// cycles. The Load and ALU operations cannot be done at the same time and so +// their latencies are added. +// Full 6 dispatches are required as this is a restricted instruction. +def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + LFSX, + LFS +)>; + +// Cracked Load instruction. +// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU +// operations cannot be done at the same time and so their latencies are added. +// Full 4 dispatches are required as this is a cracked instruction. +def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + LXSSP, + LXSSPX, + XFLOADf32, + DFLOADf32 +)>; + +// Cracked 3-Way Load Instruction +// Load with two ALU operations that depend on each other +def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], + (instrs + (instregex "LHAU(X)?(8)?$"), + LWAUX +)>; + +// Cracked Load that requires the PM resource. +// Since the Load and the PM cannot be done at the same time the latencies are +// added. Requires 8 cycles. Since the PM requires the full superslice we need +// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load +// requires the remaining 1 dispatch. +def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C], + (instrs + LXVH8X, + LXVDSX, + LXVW4X +)>; + +// Single slice Restricted store operation. The restricted operation requires +// all three dispatches for the superslice. +def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], + (instrs + (instregex "STF(S|D|IWX|SX|DX)$"), + (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), + (instregex "STW(8)?$"), + (instregex "(D|X)FSTORE(f32|f64)$"), + (instregex "ST(W|H|D)BRX$"), + (instregex "ST(B|H|D)(8)?$"), + (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), + STIWX, + SLBIEG, + STMW, + STSWI, + TLBIE +)>; + +// Vector Store Instruction +// Requires the whole superslice and therefore requires one dispatch +// as well as both the Even and Odd exec pipelines. +def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], + (instrs + (instregex "STVE(B|H|W)X$"), + (instregex "STVX(L)?$"), + (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") +)>; + +// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], + (instrs + (instregex "MTCTR(8)?(loop)?$"), + (instregex "MTLR(8)?$") +)>; + +// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], + (instrs + (instregex "M(T|F)VRSAVE(v)?$"), + (instregex "M(T|F)PMR$"), + (instregex "M(T|F)TB(8)?$"), + (instregex "MF(SPR|CTR|LR)(8)?$"), + (instregex "M(T|F)MSR(D)?$"), + (instregex "MTSPR(8)?$") +)>; + +// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], + (instrs + DIVW, + DIVWO, + DIVWU, + DIVWUO, + MODSW +)>; + +// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and two +// dispatches. +def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], + (instrs + DIVWE, + DIVWEO, + DIVD, + DIVDO, + DIVWEU, + DIVWEUO, + DIVDU, + DIVDUO, + MODSD, + MODUD, + MODUW +)>; + +// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], + (instrs + DIVDE, + DIVDEO, + DIVDEU, + DIVDEUO +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 26. +def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_EVEN_1C, DISP_1C], + (instrs + (instregex "DIVW(U)?(O)?_rec$") +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 26. +def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_EVEN_1C, DISP_1C], + (instrs + DIVD_rec, + DIVDO_rec, + DIVDU_rec, + DIVDUO_rec, + DIVWE_rec, + DIVWEO_rec, + DIVWEU_rec, + DIVWEUO_rec +)>; + +// Cracked DIV and ALU operation. Requires one full slice for the ALU operation +// and one full superslice for the DIV operation since there is only one DIV per +// superslice. Latency of DIV plus ALU is 42. +def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, + DISP_EVEN_1C, DISP_1C], + (instrs + DIVDE_rec, + DIVDEO_rec, + DIVDEU_rec, + DIVDEUO_rec +)>; + +// CR access instructions in _BrMCR, IIC_BrMCRX. + +// Cracked, restricted, ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. ALU ops are +// 2 cycles each. +def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + MTCRF, + MTCRF8 +)>; + +// Cracked ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 2 dispatches. ALU ops are +// 2 cycles each. +def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C], + (instrs + (instregex "ADDC(8)?(O)?_rec$"), + (instregex "SUBFC(8)?(O)?_rec$") +)>; + +// Cracked ALU operations. +// Two ALU ops can be done in parallel. +// One is three cycle ALU the ohter is a two cycle ALU. +// One of the ALU ops is restricted the other is not so we have a total of +// 5 dispatches. +def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "F(N)?ABS(D|S)_rec$"), + (instregex "FCPSGN(D|S)_rec$"), + (instregex "FNEG(D|S)_rec$"), + FMR_rec +)>; + +// Cracked ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 2 dispatches. +// ALU ops are 3 cycles each. +def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C], + (instrs + MCRFS +)>; + +// Cracked Restricted ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. +// ALU ops are 3 cycles each. +def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + (instregex "MTFSF(b|_rec)?$"), + (instregex "MTFSFI(_rec)?$") +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +// One of the ALU ops is restricted and takes 3 dispatches. +def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "RLD(I)?C(R|L)_rec$"), + (instregex "RLW(IMI|INM|NM)(8)?_rec$"), + (instregex "SLW(8)?_rec$"), + (instregex "SRAW(I)?_rec$"), + (instregex "SRW(8)?_rec$"), + RLDICL_32_rec, + RLDIMI_rec +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +// Both of the ALU ops are restricted and take 3 dispatches. +def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + (instregex "MFFS(L|CE|_rec)?$") +)>; + +// Cracked ALU instruction composed of three consecutive 2 cycle loads for a +// total of 6 cycles. All of the ALU operations are also restricted so each +// takes 3 dispatches for a total of 9. +def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], + (instrs + (instregex "MFCR(8)?$") +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + (instregex "EXTSWSLI_32_64_rec$"), + (instregex "SRAD(I)?_rec$"), + EXTSWSLI_rec, + SLD_rec, + SRD_rec, + RLDIC_rec +)>; + +// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + FDIV +)>; + +// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + FDIV_rec +)>; + +// 36 Cycle DP Instruction. +// Instruction can be done on a single slice. +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], + (instrs + XSSQRTDP +)>; + +// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + FSQRT +)>; + +// 36 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C], + (instrs + XVSQRTDP +)>; + +// 27 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C], + (instrs + XVSQRTSP +)>; + +// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + FSQRT_rec +)>; + +// 26 Cycle DP Instruction. +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], + (instrs + XSSQRTSP +)>; + +// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + FSQRTS +)>; + +// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + FSQRTS_rec +)>; + +// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. +def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], + (instrs + XSDIVDP +)>; + +// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], + (instrs + FDIVS +)>; + +// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. +def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + FDIVS_rec +)>; + +// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. +def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], + (instrs + XSDIVSP +)>; + +// 24 Cycle DP Vector Instruction. Takes one full superslice. +// Includes both EXECE, EXECO pipelines and 1 dispatch for the given +// superslice. +def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C], + (instrs + XVDIVSP +)>; + +// 33 Cycle DP Vector Instruction. Takes one full superslice. +// Includes both EXECE, EXECO pipelines and 1 dispatch for the given +// superslice. +def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C], + (instrs + XVDIVDP +)>; + +// Instruction cracked into three pieces. One Load and two ALU operations. +// The Load and one of the ALU ops cannot be run at the same time and so the +// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. +// Both the load and the ALU that depends on it are restricted and so they take +// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. +// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. +def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, + IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "LF(SU|SUX)$") +)>; + +// Cracked instruction made up of a Store and an ALU. The ALU does not depend on +// the store and so it can be run at the same time as the store. The store is +// also restricted. +def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "STF(S|D)U(X)?$"), + (instregex "ST(B|H|W|D)U(X)?(8)?$") +)>; + +// Cracked instruction made up of a Load and an ALU. The ALU does not depend on +// the load and so it can be run at the same time as the load. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, + DISP_PAIR_1C, DISP_PAIR_1C], + (instrs + (instregex "LBZU(X)?(8)?$"), + (instregex "LDU(X)?$") +)>; + +// Cracked instruction made up of a Load and an ALU. The ALU does not depend on +// the load and so it can be run at the same time as the load. The load is also +// restricted. 3 dispatches are from the restricted load while the other two +// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline +// is required for the ALU. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, + DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "LF(DU|DUX)$") +)>; + +// Crypto Instructions + +// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and one +// dispatch. +def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], + (instrs + (instregex "VPMSUM(B|H|W|D)$"), + (instregex "V(N)?CIPHER(LAST)?$"), + VSBOX +)>; + +// Branch Instructions + +// Two Cycle Branch +def : InstRW<[P9_BR_2C, DISP_BR_1C], + (instrs + (instregex "BCCCTR(L)?(8)?$"), + (instregex "BCCL(A|R|RL)?$"), + (instregex "BCCTR(L)?(8)?(n)?$"), + (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), + (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), + (instregex "BL(_TLS|_NOP)?$"), + (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), + (instregex "BLA(8|8_NOP)?$"), + (instregex "BLR(8|L)?$"), + (instregex "TAILB(A)?(8)?$"), + (instregex "TAILBCTR(8)?$"), + (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), + (instregex "BCLR(L)?(n)?$"), + (instregex "BCTR(L)?(8)?$"), + B, + BA, + BC, + BCC, + BCCA, + BCL, + BCLalways, + BCLn, + BCTRL8_LDinto_toc, + BCTRL_LWZinto_toc, + BCn, + CTRL_DEP +)>; + +// Five Cycle Branch with a 2 Cycle ALU Op +// Operations must be done consecutively and not in parallel. +def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], + (instrs + ADDPCIS +)>; + +// Special Extracted Instructions For Atomics + +// Atomic Load +def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, + IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, + IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, + DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "L(D|W)AT$") +)>; + +// Atomic Store +def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, + IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], + (instrs + (instregex "ST(D|W)AT$") +)>; + +// Signal Processing Engine (SPE) Instructions +// These instructions are not supported on Power 9 +def : InstRW<[], + (instrs + BRINC, + EVABS, + EVEQV, + EVMRA, + EVNAND, + EVNEG, + (instregex "EVADD(I)?W$"), + (instregex "EVADD(SM|SS|UM|US)IAAW$"), + (instregex "EVAND(C)?$"), + (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), + (instregex "EVCNTL(S|Z)W$"), + (instregex "EVDIVW(S|U)$"), + (instregex "EVEXTS(B|H)$"), + (instregex "EVLD(H|W|D)(X)?$"), + (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), + (instregex "EVLWHE(X)?$"), + (instregex "EVLWHO(S|U)(X)?$"), + (instregex "EVLW(H|W)SPLAT(X)?$"), + (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), + (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), + (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), + (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), + (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), + (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), + (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), + (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), + (instregex "EVMWHUMI(A)?$"), + (instregex "EVMWLS(M|S)IA(A|N)W$"), + (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), + (instregex "EVMWSM(F|I)(A|AA|AN)?$"), + (instregex "EVMWSSF(A|AA|AN)?$"), + (instregex "EVMWUMI(A|AA|AN)?$"), + (instregex "EV(N|X)?OR(C)?$"), + (instregex "EVR(LW|LWI|NDW)$"), + (instregex "EVSLW(I)?$"), + (instregex "EVSPLAT(F)?I$"), + (instregex "EVSRW(I)?(S|U)$"), + (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), + (instregex "EVSUBF(S|U)(M|S)IAAW$"), + (instregex "EVSUB(I)?FW$") +)> { let Unsupported = 1; } + +// General Instructions without scheduling support. +def : InstRW<[], + (instrs + (instregex "(H)?RFI(D)?$"), + (instregex "DSS(ALL)?$"), + (instregex "DST(ST)?(T)?(64)?$"), + (instregex "ICBL(C|Q)$"), + (instregex "L(W|H|B)EPX$"), + (instregex "ST(W|H|B)EPX$"), + (instregex "(L|ST)FDEPX$"), + (instregex "M(T|F)SR(IN)?$"), + (instregex "M(T|F)DCR$"), + (instregex "NOP_GT_PWR(6|7)$"), + (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), + (instregex "WRTEE(I)?$"), + ATTN, + CLRBHRB, + MFBHRBE, + MBAR, + MSYNC, + SLBSYNC, + SLBFEE_rec, + NAP, + STOP, + TRAP, + RFCI, + RFDI, + RFMCI, + SC, + DCBA, + DCBI, + DCCCI, + ICCCI +)> { let Unsupported = 1; } |