1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the resources required by P9 instructions. This is part
11// P9 processor model used for instruction scheduling. This file should contain
12// all of the instructions that may be used on Power 9. This is not just
13// instructions that are new on Power 9 but also instructions that were
14// available on earlier architectures and are still used in Power 9.
15//
16// The makeup of the P9 CPU is modeled as follows:
17//   - Each CPU is made up of two superslices.
18//   - Each superslice is made up of two slices. Therefore, there are 4 slices
19//      for each CPU.
20//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
21//   - Each CPU has:
22//     - One CY (Crypto) unit P9_CY_*
23//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
24//     - Two PM (Permute) units. One on each superslice. P9_PM_*
25//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
26//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
27//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
28//       This also includes fixed point multiply add.
29//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
30//     - Four Load/Store Queues. P9_LS_*
31//   - Each set of instructions will require a number of these resources.
32//===----------------------------------------------------------------------===//
33
34// Two cycle ALU vector operation that uses an entire superslice.
35// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
36// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
37def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
38              DISP_1C, DISP_1C, DISP_1C],
39      (instrs
40    (instregex "VADDU(B|H|W|D)M$"),
41    (instregex "VAND(C)?$"),
42    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
43    (instregex "V_SET0(B|H)?$"),
44    (instregex "VS(R|L)(B|H|W|D)$"),
45    (instregex "VSUBU(B|H|W|D)M$"),
46    (instregex "VPOPCNT(B|H)$"),
47    (instregex "VRL(B|H|W|D)$"),
48    (instregex "VSRA(B|H|W|D)$"),
49    (instregex "XV(N)?ABS(D|S)P$"),
50    (instregex "XVCPSGN(D|S)P$"),
51    (instregex "XV(I|X)EXP(D|S)P$"),
52    (instregex "VRL(D|W)(MI|NM)$"),
53    (instregex "VMRG(E|O)W$"),
54    MTVSRDD,
55    VEQV,
56    VNAND,
57    VNEGD,
58    VNEGW,
59    VNOR,
60    VOR,
61    VORC,
62    VSEL,
63    VXOR,
64    XVNEGDP,
65    XVNEGSP,
66    XXLAND,
67    XXLANDC,
68    XXLEQV,
69    XXLNAND,
70    XXLNOR,
71    XXLOR,
72    XXLORf,
73    XXLORC,
74    XXLXOR,
75    XXLXORdpz,
76    XXLXORspz,
77    XXLXORz,
78    XXSEL,
79    XSABSQP,
80    XSCPSGNQP,
81    XSIEXPQP,
82    XSNABSQP,
83    XSNEGQP,
84    XSXEXPQP
85)>;
86
87// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
88// slingle slice. However, since it is Restricted it requires all 3 dispatches
89// (DISP) for that superslice.
90def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
91      (instrs
92    (instregex "TABORT(D|W)C(I)?$"),
93    (instregex "MTFSB(0|1)$"),
94    (instregex "MFFSC(D)?RN(I)?$"),
95    (instregex "CMPRB(8)?$"),
96    (instregex "TD(I)?$"),
97    (instregex "TW(I)?$"),
98    (instregex "FCMPU(S|D)$"),
99    (instregex "XSTSTDC(S|D)P$"),
100    FTDIV,
101    FTSQRT,
102    CMPEQB
103)>;
104
105// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
106def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
107      (instrs
108    (instregex "XSMAX(C|J)?DP$"),
109    (instregex "XSMIN(C|J)?DP$"),
110    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
111    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
112    (instregex "POPCNT(D|W)$"),
113    (instregex "CMPB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    SETB,
119    BPERMD
120)>;
121
122// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
123def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
124      (instrs
125    (instregex "S(L|R)D$"),
126    (instregex "SRAD(I)?$"),
127    (instregex "EXTSWSLI$"),
128    (instregex "MFV(S)?RD$"),
129    (instregex "MTVSRD$"),
130    (instregex "MTVSRW(A|Z)$"),
131    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
132    (instregex "CMP(L)?D(I)?$"),
133    (instregex "SUBF(I)?C(8)?$"),
134    (instregex "ANDI(S)?o(8)?$"),
135    (instregex "ADDC(8)?$"),
136    (instregex "ADDIC(8)?(o)?$"),
137    (instregex "ADD(8|4)(o)?$"),
138    (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
139    (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
140    (instregex "NEG(8)?(o)?$"),
141    (instregex "POPCNTB$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
145    (instregex "NAND(8)?(o)?$"),
146    (instregex "AND(C)?(8)?(o)?$"),
147    (instregex "NOR(8)?(o)?$"),
148    (instregex "OR(C)?(8)?(o)?$"),
149    (instregex "EQV(8)?(o)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?$"),
153    (instregex "ADDI(S)?toc(HA|L)$"),
154    COPY,
155    MCRF,
156    MCRXRX,
157    XSNABSDP,
158    XSXEXPDP,
159    XSABSDP,
160    XSNEGDP,
161    XSCPSGNDP,
162    MFVSRWZ,
163    SRADI_32,
164    RLDIC,
165    RFEBB,
166    LA,
167    TBEGIN,
168    TRECHKPT,
169    NOP,
170    WAIT
171)>;
172
173// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
174//  slingle slice. However, since it is Restricted it requires all 3 dispatches
175//  (DISP) for that superslice.
176def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
177      (instrs
178    (instregex "RLDC(L|R)$"),
179    (instregex "RLWIMI(8)?$"),
180    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
181    (instregex "M(F|T)OCRF(8)?$"),
182    (instregex "CR(6)?(UN)?SET$"),
183    (instregex "CR(N)?(OR|AND)(C)?$"),
184    (instregex "S(L|R)W(8)?$"),
185    (instregex "RLW(INM|NM)(8)?$"),
186    (instregex "F(N)?ABS(D|S)$"),
187    (instregex "FNEG(D|S)$"),
188    (instregex "FCPSGN(D|S)$"),
189    (instregex "SRAW(I)?$"),
190    (instregex "ISEL(8)?$"),
191    RLDIMI,
192    XSIEXPDP,
193    FMR,
194    CREQV,
195    CRXOR,
196    TRECLAIM,
197    TSR,
198    TABORT
199)>;
200
201// Three cycle ALU vector operation that uses an entire superslice.
202// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
203// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
204def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
205              DISP_1C, DISP_1C, DISP_1C],
206      (instrs
207    (instregex "M(T|F)VSCR$"),
208    (instregex "VCMPNEZ(B|H|W)$"),
209    (instregex "VCMPEQU(B|H|W|D)$"),
210    (instregex "VCMPNE(B|H|W)$"),
211    (instregex "VABSDU(B|H|W)$"),
212    (instregex "VADDU(B|H|W)S$"),
213    (instregex "VAVG(S|U)(B|H|W)$"),
214    (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
215    (instregex "VCMPBFP(o)?$"),
216    (instregex "VC(L|T)Z(B|H|W|D)$"),
217    (instregex "VADDS(B|H|W)S$"),
218    (instregex "V(MIN|MAX)FP$"),
219    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
220    VBPERMD,
221    VADDCUW,
222    VPOPCNTW,
223    VPOPCNTD,
224    VPRTYBD,
225    VPRTYBW,
226    VSHASIGMAD,
227    VSHASIGMAW,
228    VSUBSBS,
229    VSUBSHS,
230    VSUBSWS,
231    VSUBUBS,
232    VSUBUHS,
233    VSUBUWS,
234    VSUBCUW,
235    VCMPGTSB,
236    VCMPGTSBo,
237    VCMPGTSD,
238    VCMPGTSDo,
239    VCMPGTSH,
240    VCMPGTSHo,
241    VCMPGTSW,
242    VCMPGTSWo,
243    VCMPGTUB,
244    VCMPGTUBo,
245    VCMPGTUD,
246    VCMPGTUDo,
247    VCMPGTUH,
248    VCMPGTUHo,
249    VCMPGTUW,
250    VCMPGTUWo,
251    VCMPNEBo,
252    VCMPNEHo,
253    VCMPNEWo,
254    VCMPNEZBo,
255    VCMPNEZHo,
256    VCMPNEZWo,
257    VCMPEQUBo,
258    VCMPEQUDo,
259    VCMPEQUHo,
260    VCMPEQUWo,
261    XVCMPEQDP,
262    XVCMPEQDPo,
263    XVCMPEQSP,
264    XVCMPEQSPo,
265    XVCMPGEDP,
266    XVCMPGEDPo,
267    XVCMPGESP,
268    XVCMPGESPo,
269    XVCMPGTDP,
270    XVCMPGTDPo,
271    XVCMPGTSP,
272    XVCMPGTSPo,
273    XVMAXDP,
274    XVMAXSP,
275    XVMINDP,
276    XVMINSP,
277    XVTDIVDP,
278    XVTDIVSP,
279    XVTSQRTDP,
280    XVTSQRTSP,
281    XVTSTDCDP,
282    XVTSTDCSP,
283    XVXSIGDP,
284    XVXSIGSP
285)>;
286
287// 7 cycle DP vector operation that uses an entire superslice.
288//  Uses both DP units (the even DPE and odd DPO units), two pipelines
289//  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
291              DISP_1C, DISP_1C, DISP_1C],
292      (instrs
293    VADDFP,
294    VCTSXS,
295    VCTSXS_0,
296    VCTUXS,
297    VCTUXS_0,
298    VEXPTEFP,
299    VLOGEFP,
300    VMADDFP,
301    VMHADDSHS,
302    VNMSUBFP,
303    VREFP,
304    VRFIM,
305    VRFIN,
306    VRFIP,
307    VRFIZ,
308    VRSQRTEFP,
309    VSUBFP,
310    XVADDDP,
311    XVADDSP,
312    XVCVDPSP,
313    XVCVDPSXDS,
314    XVCVDPSXWS,
315    XVCVDPUXDS,
316    XVCVDPUXWS,
317    XVCVHPSP,
318    XVCVSPDP,
319    XVCVSPHP,
320    XVCVSPSXDS,
321    XVCVSPSXWS,
322    XVCVSPUXDS,
323    XVCVSPUXWS,
324    XVCVSXDDP,
325    XVCVSXDSP,
326    XVCVSXWDP,
327    XVCVSXWSP,
328    XVCVUXDDP,
329    XVCVUXDSP,
330    XVCVUXWDP,
331    XVCVUXWSP,
332    XVMADDADP,
333    XVMADDASP,
334    XVMADDMDP,
335    XVMADDMSP,
336    XVMSUBADP,
337    XVMSUBASP,
338    XVMSUBMDP,
339    XVMSUBMSP,
340    XVMULDP,
341    XVMULSP,
342    XVNMADDADP,
343    XVNMADDASP,
344    XVNMADDMDP,
345    XVNMADDMSP,
346    XVNMSUBADP,
347    XVNMSUBASP,
348    XVNMSUBMDP,
349    XVNMSUBMSP,
350    XVRDPI,
351    XVRDPIC,
352    XVRDPIM,
353    XVRDPIP,
354    XVRDPIZ,
355    XVREDP,
356    XVRESP,
357    XVRSPI,
358    XVRSPIC,
359    XVRSPIM,
360    XVRSPIP,
361    XVRSPIZ,
362    XVRSQRTEDP,
363    XVRSQRTESP,
364    XVSUBDP,
365    XVSUBSP,
366    VCFSX,
367    VCFSX_0,
368    VCFUX,
369    VCFUX_0,
370    VMHRADDSHS,
371    VMLADDUHM,
372    VMSUMMBM,
373    VMSUMSHM,
374    VMSUMSHS,
375    VMSUMUBM,
376    VMSUMUHM,
377    VMSUMUHS,
378    VMULESB,
379    VMULESH,
380    VMULESW,
381    VMULEUB,
382    VMULEUH,
383    VMULEUW,
384    VMULOSB,
385    VMULOSH,
386    VMULOSW,
387    VMULOUB,
388    VMULOUH,
389    VMULOUW,
390    VMULUWM,
391    VSUM2SWS,
392    VSUM4SBS,
393    VSUM4SHS,
394    VSUM4UBS,
395    VSUMSWS
396)>;
397
398
399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
400//  dispatch units for the superslice.
401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
402      (instrs
403    (instregex "MADD(HD|HDU|LD)$"),
404    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
405)>;
406
407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
408//  dispatch units for the superslice.
409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
410      (instrs
411    FRSP,
412    (instregex "FRI(N|P|Z|M)(D|S)$"),
413    (instregex "FRE(S)?$"),
414    (instregex "FADD(S)?$"),
415    (instregex "FMSUB(S)?$"),
416    (instregex "FMADD(S)?$"),
417    (instregex "FSUB(S)?$"),
418    (instregex "FCFID(U)?(S)?$"),
419    (instregex "FCTID(U)?(Z)?$"),
420    (instregex "FCTIW(U)?(Z)?$"),
421    (instregex "FRSQRTE(S)?$"),
422    FNMADDS,
423    FNMADD,
424    FNMSUBS,
425    FNMSUB,
426    FSELD,
427    FSELS,
428    FMULS,
429    FMUL,
430    XSMADDADP,
431    XSMADDASP,
432    XSMADDMDP,
433    XSMADDMSP,
434    XSMSUBADP,
435    XSMSUBASP,
436    XSMSUBMDP,
437    XSMSUBMSP,
438    XSMULDP,
439    XSMULSP,
440    XSNMADDADP,
441    XSNMADDASP,
442    XSNMADDMDP,
443    XSNMADDMSP,
444    XSNMSUBADP,
445    XSNMSUBASP,
446    XSNMSUBMDP,
447    XSNMSUBMSP
448)>;
449
450// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
451// These operations can be done in parallel.
452//  The DP is restricted so we need a full 5 dispatches.
453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
454              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
455      (instrs
456    (instregex "FSEL(D|S)o$")
457)>;
458
459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
461              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
462      (instrs
463    (instregex "MUL(H|L)(D|W)(U)?o$")
464)>;
465
466// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
467// These operations must be done sequentially.
468//  The DP is restricted so we need a full 5 dispatches.
469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
470              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
471      (instrs
472    (instregex "FRI(N|P|Z|M)(D|S)o$"),
473    (instregex "FRE(S)?o$"),
474    (instregex "FADD(S)?o$"),
475    (instregex "FSUB(S)?o$"),
476    (instregex "F(N)?MSUB(S)?o$"),
477    (instregex "F(N)?MADD(S)?o$"),
478    (instregex "FCFID(U)?(S)?o$"),
479    (instregex "FCTID(U)?(Z)?o$"),
480    (instregex "FCTIW(U)?(Z)?o$"),
481    (instregex "FMUL(S)?o$"),
482    (instregex "FRSQRTE(S)?o$"),
483    FRSPo
484)>;
485
486// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
488      (instrs
489    XSADDDP,
490    XSADDSP,
491    XSCVDPHP,
492    XSCVDPSP,
493    XSCVDPSXDS,
494    XSCVDPSXDSs,
495    XSCVDPSXWS,
496    XSCVDPUXDS,
497    XSCVDPUXDSs,
498    XSCVDPUXWS,
499    XSCVDPSXWSs,
500    XSCVDPUXWSs,
501    XSCVHPDP,
502    XSCVSPDP,
503    XSCVSXDDP,
504    XSCVSXDSP,
505    XSCVUXDDP,
506    XSCVUXDSP,
507    XSRDPI,
508    XSRDPIC,
509    XSRDPIM,
510    XSRDPIP,
511    XSRDPIZ,
512    XSREDP,
513    XSRESP,
514    XSRSQRTEDP,
515    XSRSQRTESP,
516    XSSUBDP,
517    XSSUBSP,
518    XSCVDPSPN,
519    XSRSP
520)>;
521
522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
523//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
524//  dispatches.
525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
526      (instrs
527    (instregex "LVS(L|R)$"),
528    (instregex "VSPLTIS(W|H|B)$"),
529    (instregex "VSPLT(W|H|B)(s)?$"),
530    (instregex "V_SETALLONES(B|H)?$"),
531    (instregex "VEXTRACTU(B|H|W)$"),
532    (instregex "VINSERT(B|H|W|D)$"),
533    MFVSRLD,
534    MTVSRWS,
535    VBPERMQ,
536    VCLZLSBB,
537    VCTZLSBB,
538    VEXTRACTD,
539    VEXTUBLX,
540    VEXTUBRX,
541    VEXTUHLX,
542    VEXTUHRX,
543    VEXTUWLX,
544    VEXTUWRX,
545    VGBBD,
546    VMRGHB,
547    VMRGHH,
548    VMRGHW,
549    VMRGLB,
550    VMRGLH,
551    VMRGLW,
552    VPERM,
553    VPERMR,
554    VPERMXOR,
555    VPKPX,
556    VPKSDSS,
557    VPKSDUS,
558    VPKSHSS,
559    VPKSHUS,
560    VPKSWSS,
561    VPKSWUS,
562    VPKUDUM,
563    VPKUDUS,
564    VPKUHUM,
565    VPKUHUS,
566    VPKUWUM,
567    VPKUWUS,
568    VPRTYBQ,
569    VSL,
570    VSLDOI,
571    VSLO,
572    VSLV,
573    VSR,
574    VSRO,
575    VSRV,
576    VUPKHPX,
577    VUPKHSB,
578    VUPKHSH,
579    VUPKHSW,
580    VUPKLPX,
581    VUPKLSB,
582    VUPKLSH,
583    VUPKLSW,
584    XXBRD,
585    XXBRH,
586    XXBRQ,
587    XXBRW,
588    XXEXTRACTUW,
589    XXINSERTW,
590    XXMRGHW,
591    XXMRGLW,
592    XXPERM,
593    XXPERMR,
594    XXSLDWI,
595    XXSLDWIs,
596    XXSPLTIB,
597    XXSPLTW,
598    XXSPLTWs,
599    XXPERMDI,
600    XXPERMDIs,
601    VADDCUQ,
602    VADDECUQ,
603    VADDEUQM,
604    VADDUQM,
605    VMUL10CUQ,
606    VMUL10ECUQ,
607    VMUL10EUQ,
608    VMUL10UQ,
609    VSUBCUQ,
610    VSUBECUQ,
611    VSUBEUQM,
612    VSUBUQM,
613    XSCMPEXPQP,
614    XSCMPOQP,
615    XSCMPUQP,
616    XSTSTDCQP,
617    XSXSIGQP,
618    BCDCFNo,
619    BCDCFZo,
620    BCDCPSGNo,
621    BCDCTNo,
622    BCDCTZo,
623    BCDSETSGNo,
624    BCDSo,
625    BCDTRUNCo,
626    BCDUSo,
627    BCDUTRUNCo
628)>;
629
630// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
631//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
632//  dispatches.
633def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
634      (instrs
635    BCDSRo,
636    XSADDQP,
637    XSADDQPO,
638    XSCVDPQP,
639    XSCVQPDP,
640    XSCVQPDPO,
641    XSCVQPSDZ,
642    XSCVQPSWZ,
643    XSCVQPUDZ,
644    XSCVQPUWZ,
645    XSCVSDQP,
646    XSCVUDQP,
647    XSRQPI,
648    XSRQPIX,
649    XSRQPXP,
650    XSSUBQP,
651    XSSUBQPO
652)>;
653
654// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
655//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
656//  dispatches.
657def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
658      (instrs
659    BCDCTSQo
660)>;
661
662// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
663//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
664//  dispatches.
665def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
666      (instrs
667    XSMADDQP,
668    XSMADDQPO,
669    XSMSUBQP,
670    XSMSUBQPO,
671    XSMULQP,
672    XSMULQPO,
673    XSNMADDQP,
674    XSNMADDQPO,
675    XSNMSUBQP,
676    XSNMSUBQPO
677)>;
678
679// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
680//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
681//  dispatches.
682def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
683      (instrs
684    BCDCFSQo
685)>;
686
687// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
688//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
689//  dispatches.
690def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
691      (instrs
692    XSDIVQP,
693    XSDIVQPO
694)>;
695
696// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
697//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
698//  dispatches.
699def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
700      (instrs
701    XSSQRTQP,
702    XSSQRTQPO
703)>;
704
705// 6 Cycle Load uses a single slice.
706def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
707      (instrs
708    (instregex "LXVL(L)?")
709)>;
710
711// 5 Cycle Load uses a single slice.
712def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
713      (instrs
714    (instregex "LVE(B|H|W)X$"),
715    (instregex "LVX(L)?"),
716    (instregex "LXSI(B|H)ZX$"),
717    LXSDX,
718    LXVB16X,
719    LXVD2X,
720    LXVWSX,
721    LXSIWZX,
722    LXV,
723    LXVX,
724    LXSD,
725    DFLOADf64,
726    XFLOADf64,
727    LIWZX
728)>;
729
730// 4 Cycle Load uses a single slice.
731def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
732      (instrs
733    (instregex "DCB(F|T|ST)(EP)?$"),
734    (instregex "DCBZ(L)?(EP)?$"),
735    (instregex "DCBTST(EP)?$"),
736    (instregex "CP_COPY(8)?$"),
737    (instregex "CP_PASTE(8)?$"),
738    (instregex "ICBI(EP)?$"),
739    (instregex "ICBT(LS)?$"),
740    (instregex "LBARX(L)?$"),
741    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
742    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
743    (instregex "LH(A|B)RX(L)?(8)?$"),
744    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
745    (instregex "LWARX(L)?$"),
746    (instregex "LWBRX(8)?$"),
747    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
748    CP_ABORT,
749    DARN,
750    EnforceIEIO,
751    ISYNC,
752    MSGSYNC,
753    TLBSYNC,
754    SYNC,
755    LMW,
756    LSWI
757)>;
758
759// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
760//  superslice.
761def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
762      (instrs
763    LFIWZX,
764    LFDX,
765    LFD
766)>;
767
768// Cracked Load Instructions.
769// Load instructions that can be done in parallel.
770def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
771              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
772      (instrs
773    SLBIA,
774    SLBIE,
775    SLBMFEE,
776    SLBMFEV,
777    SLBMTE,
778    TLBIEL
779)>;
780
781// Cracked Load Instruction.
782// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
783// operations can be run in parallel.
784def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
785              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
786      (instrs
787    (instregex "L(W|H)ZU(X)?(8)?$"),
788    TEND
789)>;
790
791// Cracked Store Instruction
792// Consecutive Store and ALU instructions. The store is restricted and requires
793// three dispatches.
794def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
795              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
796      (instrs
797    (instregex "ST(B|H|W|D)CX$")
798)>;
799
800// Cracked Load Instruction.
801// Two consecutive load operations for a total of 8 cycles.
802def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
803              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
804      (instrs
805    LDMX
806)>;
807
808// Cracked Load instruction.
809// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
810//  operations cannot be done at the same time and so their latencies are added.
811def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
812              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
813      (instrs
814    (instregex "LHA(X)?(8)?$"),
815    (instregex "CP_PASTE(8)?o$"),
816    (instregex "LWA(X)?(_32)?$"),
817    TCHECK
818)>;
819
820// Cracked Restricted Load instruction.
821// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
822//  operations cannot be done at the same time and so their latencies are added.
823// Full 6 dispatches are required as this is both cracked and restricted.
824def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
825              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
826      (instrs
827    LFIWAX
828)>;
829
830// Cracked Load instruction.
831// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
832//  operations cannot be done at the same time and so their latencies are added.
833// Full 4 dispatches are required as this is a cracked instruction.
834def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
835              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
836      (instrs
837    LXSIWAX,
838    LIWAX
839)>;
840
841// Cracked Load instruction.
842// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
843// cycles. The Load and ALU operations cannot be done at the same time and so
844// their latencies are added.
845// Full 6 dispatches are required as this is a restricted instruction.
846def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
847              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
848      (instrs
849    LFSX,
850    LFS
851)>;
852
853// Cracked Load instruction.
854// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
855//  operations cannot be done at the same time and so their latencies are added.
856// Full 4 dispatches are required as this is a cracked instruction.
857def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
858              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
859      (instrs
860    LXSSP,
861    LXSSPX,
862    XFLOADf32,
863    DFLOADf32
864)>;
865
866// Cracked 3-Way Load Instruction
867// Load with two ALU operations that depend on each other
868def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
869              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
870      (instrs
871    (instregex "LHAU(X)?(8)?$"),
872    LWAUX
873)>;
874
875// Cracked Load that requires the PM resource.
876// Since the Load and the PM cannot be done at the same time the latencies are
877//  added. Requires 8 cycles.
878// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
879//  as well as 3 dispatches for the PM. The Load requires the remaining 2
880//  dispatches.
881def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
882              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
883      (instrs
884    LXVH8X,
885    LXVDSX,
886    LXVW4X
887)>;
888
889// Single slice Restricted store operation. The restricted operation requires
890//  all three dispatches for the superslice.
891def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
892      (instrs
893    (instregex "STF(S|D|IWX|SX|DX)$"),
894    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
895    (instregex "STW(8)?$"),
896    (instregex "(D|X)FSTORE(f32|f64)$"),
897    (instregex "ST(W|H|D)BRX$"),
898    (instregex "ST(B|H|D)(8)?$"),
899    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
900    STIWX,
901    SLBIEG,
902    STMW,
903    STSWI,
904    TLBIE
905)>;
906
907// Vector Store Instruction
908// Requires the whole superslice and therefore requires all three dispatches
909// as well as both the Even and Odd exec pipelines.
910def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
911              DISP_1C, DISP_1C, DISP_1C],
912      (instrs
913    (instregex "STVE(B|H|W)X$"),
914    (instregex "STVX(L)?$"),
915    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
916)>;
917
918// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
919// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
920// dispatches.
921def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
922      (instrs
923    (instregex "MTCTR(8)?(loop)?$"),
924    (instregex "MTLR(8)?$")
925)>;
926
927// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
928// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
929// dispatches.
930def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
931      (instrs
932    (instregex "M(T|F)VRSAVE(v)?$"),
933    (instregex "M(T|F)PMR$"),
934    (instregex "M(T|F)TB(8)?$"),
935    (instregex "MF(SPR|CTR|LR)(8)?$"),
936    (instregex "M(T|F)MSR(D)?$"),
937    (instregex "MTSPR(8)?$")
938)>;
939
940// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
941//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
942//  dispatches.
943def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
944              DISP_1C, DISP_1C, DISP_1C],
945      (instrs
946    DIVW,
947    DIVWU,
948    MODSW
949)>;
950
951// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
952//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
953//  dispatches.
954def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
955              DISP_1C, DISP_1C, DISP_1C],
956      (instrs
957    DIVWE,
958    DIVD,
959    DIVWEU,
960    DIVDU,
961    MODSD,
962    MODUD,
963    MODUW
964)>;
965
966// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
967//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
968//  dispatches.
969def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
970              DISP_1C, DISP_1C, DISP_1C],
971      (instrs
972    DIVDE,
973    DIVDEU
974)>;
975
976// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
977//  and one full superslice for the DIV operation since there is only one DIV
978//  per superslice. Latency of DIV plus ALU is 26.
979def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
980              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
981      (instrs
982    (instregex "DIVW(U)?(O)?o$")
983)>;
984
985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
986//  and one full superslice for the DIV operation since there is only one DIV
987//  per superslice. Latency of DIV plus ALU is 26.
988def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
989              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
990      (instrs
991    DIVDo,
992    DIVDUo,
993    DIVWEo,
994    DIVWEUo
995)>;
996
997// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
998//  and one full superslice for the DIV operation since there is only one DIV
999//  per superslice. Latency of DIV plus ALU is 42.
1000def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1001              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1002      (instrs
1003    DIVDEo,
1004    DIVDEUo
1005)>;
1006
1007// CR access instructions in _BrMCR, IIC_BrMCRX.
1008
1009// Cracked, restricted, ALU operations.
1010// Here the two ALU ops can actually be done in parallel and therefore the
1011//  latencies are not added together. Otherwise this is like having two
1012//  instructions running together on two pipelines and 6 dispatches.
1013// ALU ops are 2 cycles each.
1014def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1015              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1016      (instrs
1017    MTCRF,
1018    MTCRF8
1019)>;
1020
1021// Cracked ALU operations.
1022// Here the two ALU ops can actually be done in parallel and therefore the
1023//  latencies are not added together. Otherwise this is like having two
1024//  instructions running together on two pipelines and 4 dispatches.
1025// ALU ops are 2 cycles each.
1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1027              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1028      (instrs
1029    (instregex "ADDC(8)?o$"),
1030    (instregex "SUBFC(8)?o$")
1031)>;
1032
1033// Cracked ALU operations.
1034// Two ALU ops can be done in parallel.
1035// One is three cycle ALU the ohter is a two cycle ALU.
1036// One of the ALU ops is restricted the other is not so we have a total of
1037// 5 dispatches.
1038def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1039              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1040      (instrs
1041    (instregex "F(N)?ABS(D|S)o$"),
1042    (instregex "FCPSGN(D|S)o$"),
1043    (instregex "FNEG(D|S)o$"),
1044    FMRo
1045)>;
1046
1047// Cracked ALU operations.
1048// Here the two ALU ops can actually be done in parallel and therefore the
1049//  latencies are not added together. Otherwise this is like having two
1050//  instructions running together on two pipelines and 4 dispatches.
1051// ALU ops are 3 cycles each.
1052def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1053              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1054      (instrs
1055    MCRFS
1056)>;
1057
1058// Cracked Restricted ALU operations.
1059// Here the two ALU ops can actually be done in parallel and therefore the
1060//  latencies are not added together. Otherwise this is like having two
1061//  instructions running together on two pipelines and 6 dispatches.
1062// ALU ops are 3 cycles each.
1063def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1064              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1065      (instrs
1066    (instregex "MTFSF(b|o)?$"),
1067    (instregex "MTFSFI(o)?$")
1068)>;
1069
1070// Cracked instruction made of two ALU ops.
1071// The two ops cannot be done in parallel.
1072// One of the ALU ops is restricted and takes 3 dispatches.
1073def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1074              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1075      (instrs
1076    (instregex "RLD(I)?C(R|L)o$"),
1077    (instregex "RLW(IMI|INM|NM)(8)?o$"),
1078    (instregex "SLW(8)?o$"),
1079    (instregex "SRAW(I)?o$"),
1080    (instregex "SRW(8)?o$"),
1081    RLDICL_32o,
1082    RLDIMIo
1083)>;
1084
1085// Cracked instruction made of two ALU ops.
1086// The two ops cannot be done in parallel.
1087// Both of the ALU ops are restricted and take 3 dispatches.
1088def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1089              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1090      (instrs
1091    (instregex "MFFS(L|CE|o)?$")
1092)>;
1093
1094// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1095// total of 6 cycles. All of the ALU operations are also restricted so each
1096// takes 3 dispatches for a total of 9.
1097def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1098              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1099              DISP_1C, DISP_1C],
1100      (instrs
1101    (instregex "MFCR(8)?$")
1102)>;
1103
1104// Cracked instruction made of two ALU ops.
1105// The two ops cannot be done in parallel.
1106def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1107              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1108      (instrs
1109    (instregex "EXTSWSLIo$"),
1110    (instregex "SRAD(I)?o$"),
1111    SLDo,
1112    SRDo,
1113    RLDICo
1114)>;
1115
1116// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1117def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1118      (instrs
1119    FDIV
1120)>;
1121
1122// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1123def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1124              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1125      (instrs
1126    FDIVo
1127)>;
1128
1129// 36 Cycle DP Instruction.
1130// Instruction can be done on a single slice.
1131def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
1132      (instrs
1133    XSSQRTDP
1134)>;
1135
1136// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1137def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1138      (instrs
1139    FSQRT
1140)>;
1141
1142// 36 Cycle DP Vector Instruction.
1143def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1144              DISP_1C, DISP_1C, DISP_1C],
1145      (instrs
1146    XVSQRTDP
1147)>;
1148
1149// 27 Cycle DP Vector Instruction.
1150def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1151              DISP_1C, DISP_1C, DISP_1C],
1152      (instrs
1153    XVSQRTSP
1154)>;
1155
1156// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1157def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1158              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1159      (instrs
1160    FSQRTo
1161)>;
1162
1163// 26 Cycle DP Instruction.
1164def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
1165      (instrs
1166    XSSQRTSP
1167)>;
1168
1169// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1170def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1171      (instrs
1172    FSQRTS
1173)>;
1174
1175// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1176def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1177              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1178      (instrs
1179    FSQRTSo
1180)>;
1181
1182// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
1183def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
1184      (instrs
1185    XSDIVDP
1186)>;
1187
1188// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1189def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
1190      (instrs
1191    FDIVS
1192)>;
1193
1194// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1195def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1196              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1197      (instrs
1198    FDIVSo
1199)>;
1200
1201// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
1203      (instrs
1204    XSDIVSP
1205)>;
1206
1207// 24 Cycle DP Vector Instruction. Takes one full superslice.
1208// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1209//  superslice.
1210def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1211              DISP_1C, DISP_1C, DISP_1C],
1212      (instrs
1213    XVDIVSP
1214)>;
1215
1216// 33 Cycle DP Vector Instruction. Takes one full superslice.
1217// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
1218//  superslice.
1219def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1220              DISP_1C, DISP_1C, DISP_1C],
1221      (instrs
1222    XVDIVDP
1223)>;
1224
1225// Instruction cracked into three pieces. One Load and two ALU operations.
1226// The Load and one of the ALU ops cannot be run at the same time and so the
1227//  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1228// Both the load and the ALU that depends on it are restricted and so they take
1229//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
1230// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1231def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1232              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1233              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1234              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1235      (instrs
1236    (instregex "LF(SU|SUX)$")
1237)>;
1238
1239// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1240// the store and so it can be run at the same time as the store. The store is
1241// also restricted.
1242def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1243              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1244      (instrs
1245    (instregex "STF(S|D)U(X)?$"),
1246    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1247)>;
1248
1249// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1250// the load and so it can be run at the same time as the load.
1251def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1252              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1253      (instrs
1254    (instregex "LBZU(X)?(8)?$"),
1255    (instregex "LDU(X)?$")
1256)>;
1257
1258
1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1260//  the load and so it can be run at the same time as the load. The load is also
1261//  restricted. 3 dispatches are from the restricted load while the other two
1262//  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1263//  is required for the ALU.
1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1265              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1266      (instrs
1267    (instregex "LF(DU|DUX)$")
1268)>;
1269
1270// Crypto Instructions
1271
1272// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1273//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
1274//  dispatches.
1275def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
1276      (instrs
1277    (instregex "VPMSUM(B|H|W|D)$"),
1278    (instregex "V(N)?CIPHER(LAST)?$"),
1279    VSBOX
1280)>;
1281
1282// Branch Instructions
1283
1284// Two Cycle Branch
1285def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
1286      (instrs
1287  (instregex "BCCCTR(L)?(8)?$"),
1288  (instregex "BCCL(A|R|RL)?$"),
1289  (instregex "BCCTR(L)?(8)?(n)?$"),
1290  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1291  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1292  (instregex "BL(_TLS)?$"),
1293  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1294  (instregex "BLA(8|8_NOP)?$"),
1295  (instregex "BLR(8|L)?$"),
1296  (instregex "TAILB(A)?(8)?$"),
1297  (instregex "TAILBCTR(8)?$"),
1298  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1299  (instregex "BCLR(L)?(n)?$"),
1300  (instregex "BCTR(L)?(8)?$"),
1301  B,
1302  BA,
1303  BC,
1304  BCC,
1305  BCCA,
1306  BCL,
1307  BCLalways,
1308  BCLn,
1309  BCTRL8_LDinto_toc,
1310  BCn,
1311  CTRL_DEP
1312)>;
1313
1314// Five Cycle Branch with a 2 Cycle ALU Op
1315// Operations must be done consecutively and not in parallel.
1316def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
1317              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
1318      (instrs
1319    ADDPCIS
1320)>;
1321
1322// Special Extracted Instructions For Atomics
1323
1324// Atomic Load
1325def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1326              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1327              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1328              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1329              DISP_1C],
1330      (instrs
1331    (instregex "L(D|W)AT$")
1332)>;
1333
1334// Atomic Store
1335def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1336              IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
1337              DISP_1C],
1338      (instrs
1339    (instregex "ST(D|W)AT$")
1340)>;
1341
1342// Signal Processing Engine (SPE) Instructions
1343// These instructions are not supported on Power 9
1344def : InstRW<[],
1345    (instrs
1346  BRINC,
1347  EVABS,
1348  EVEQV,
1349  EVMRA,
1350  EVNAND,
1351  EVNEG,
1352  (instregex "EVADD(I)?W$"),
1353  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1354  (instregex "EVAND(C)?$"),
1355  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1356  (instregex "EVCNTL(S|Z)W$"),
1357  (instregex "EVDIVW(S|U)$"),
1358  (instregex "EVEXTS(B|H)$"),
1359  (instregex "EVLD(H|W|D)(X)?$"),
1360  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1361  (instregex "EVLWHE(X)?$"),
1362  (instregex "EVLWHO(S|U)(X)?$"),
1363  (instregex "EVLW(H|W)SPLAT(X)?$"),
1364  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1365  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1366  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1367  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1368  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1369  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1370  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1371  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1372  (instregex "EVMWHUMI(A)?$"),
1373  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1374  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1375  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1376  (instregex "EVMWSSF(A|AA|AN)?$"),
1377  (instregex "EVMWUMI(A|AA|AN)?$"),
1378  (instregex "EV(N|X)?OR(C)?$"),
1379  (instregex "EVR(LW|LWI|NDW)$"),
1380  (instregex "EVSLW(I)?$"),
1381  (instregex "EVSPLAT(F)?I$"),
1382  (instregex "EVSRW(I)?(S|U)$"),
1383  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1384  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1385  (instregex "EVSUB(I)?FW$")
1386)> { let Unsupported = 1; }
1387
1388// General Instructions without scheduling support.
1389def : InstRW<[],
1390    (instrs
1391  (instregex "(H)?RFI(D)?$"),
1392  (instregex "DSS(ALL)?$"),
1393  (instregex "DST(ST)?(T)?(64)?$"),
1394  (instregex "ICBL(C|Q)$"),
1395  (instregex "L(W|H|B)EPX$"),
1396  (instregex "ST(W|H|B)EPX$"),
1397  (instregex "(L|ST)FDEPX$"),
1398  (instregex "M(T|F)SR(IN)?$"),
1399  (instregex "M(T|F)DCR$"),
1400  (instregex "NOP_GT_PWR(6|7)$"),
1401  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1402  (instregex "WRTEE(I)?$"),
1403  ATTN,
1404  CLRBHRB,
1405  MFBHRBE,
1406  MBAR,
1407  MSYNC,
1408  SLBSYNC,
1409  NAP,
1410  STOP,
1411  TRAP,
1412  RFCI,
1413  RFDI,
1414  RFMCI,
1415  SC,
1416  DCBA,
1417  DCBI,
1418  DCCCI,
1419  ICCCI
1420)> { let Unsupported = 1; }
1421