1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the resources required by P9 instructions. This is part 11// P9 processor model used for instruction scheduling. This file should contain 12// all of the instructions that may be used on Power 9. This is not just 13// instructions that are new on Power 9 but also instructions that were 14// available on earlier architectures and are still used in Power 9. 15// 16// The makeup of the P9 CPU is modeled as follows: 17// - Each CPU is made up of two superslices. 18// - Each superslice is made up of two slices. Therefore, there are 4 slices 19// for each CPU. 20// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 21// - Each CPU has: 22// - One CY (Crypto) unit P9_CY_* 23// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 24// - Two PM (Permute) units. One on each superslice. P9_PM_* 25// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 26// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 27// - Four DP (Floating Point) units. One on each slice. P9_DP_* 28// This also includes fixed point multiply add. 29// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 30// - Four Load/Store Queues. P9_LS_* 31// - Each set of instructions will require a number of these resources. 32//===----------------------------------------------------------------------===// 33 34// Two cycle ALU vector operation that uses an entire superslice. 35// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 36// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. 37def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, 38 DISP_1C, DISP_1C, DISP_1C], 39 (instrs 40 (instregex "VADDU(B|H|W|D)M$"), 41 (instregex "VAND(C)?$"), 42 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 43 (instregex "V_SET0(B|H)?$"), 44 (instregex "VS(R|L)(B|H|W|D)$"), 45 (instregex "VSUBU(B|H|W|D)M$"), 46 (instregex "VPOPCNT(B|H)$"), 47 (instregex "VRL(B|H|W|D)$"), 48 (instregex "VSRA(B|H|W|D)$"), 49 (instregex "XV(N)?ABS(D|S)P$"), 50 (instregex "XVCPSGN(D|S)P$"), 51 (instregex "XV(I|X)EXP(D|S)P$"), 52 (instregex "VRL(D|W)(MI|NM)$"), 53 (instregex "VMRG(E|O)W$"), 54 MTVSRDD, 55 VEQV, 56 VNAND, 57 VNEGD, 58 VNEGW, 59 VNOR, 60 VOR, 61 VORC, 62 VSEL, 63 VXOR, 64 XVNEGDP, 65 XVNEGSP, 66 XXLAND, 67 XXLANDC, 68 XXLEQV, 69 XXLNAND, 70 XXLNOR, 71 XXLOR, 72 XXLORf, 73 XXLORC, 74 XXLXOR, 75 XXLXORdpz, 76 XXLXORspz, 77 XXLXORz, 78 XXSEL, 79 XSABSQP, 80 XSCPSGNQP, 81 XSIEXPQP, 82 XSNABSQP, 83 XSNEGQP, 84 XSXEXPQP 85)>; 86 87// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 88// slingle slice. However, since it is Restricted it requires all 3 dispatches 89// (DISP) for that superslice. 90def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 91 (instrs 92 (instregex "TABORT(D|W)C(I)?$"), 93 (instregex "MTFSB(0|1)$"), 94 (instregex "MFFSC(D)?RN(I)?$"), 95 (instregex "CMPRB(8)?$"), 96 (instregex "TD(I)?$"), 97 (instregex "TW(I)?$"), 98 (instregex "FCMPU(S|D)$"), 99 (instregex "XSTSTDC(S|D)P$"), 100 FTDIV, 101 FTSQRT, 102 CMPEQB 103)>; 104 105// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 106def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C], 107 (instrs 108 (instregex "XSMAX(C|J)?DP$"), 109 (instregex "XSMIN(C|J)?DP$"), 110 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 111 (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), 112 (instregex "POPCNT(D|W)$"), 113 (instregex "CMPB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 SETB, 119 BPERMD 120)>; 121 122// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 123def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], 124 (instrs 125 (instregex "S(L|R)D$"), 126 (instregex "SRAD(I)?$"), 127 (instregex "EXTSWSLI$"), 128 (instregex "MFV(S)?RD$"), 129 (instregex "MTVSRD$"), 130 (instregex "MTVSRW(A|Z)$"), 131 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 132 (instregex "CMP(L)?D(I)?$"), 133 (instregex "SUBF(I)?C(8)?$"), 134 (instregex "ANDI(S)?o(8)?$"), 135 (instregex "ADDC(8)?$"), 136 (instregex "ADDIC(8)?(o)?$"), 137 (instregex "ADD(8|4)(o)?$"), 138 (instregex "ADD(E|ME|ZE)(8)?(o)?$"), 139 (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), 140 (instregex "NEG(8)?(o)?$"), 141 (instregex "POPCNTB$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(o)?$"), 145 (instregex "NAND(8)?(o)?$"), 146 (instregex "AND(C)?(8)?(o)?$"), 147 (instregex "NOR(8)?(o)?$"), 148 (instregex "OR(C)?(8)?(o)?$"), 149 (instregex "EQV(8)?(o)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?$"), 153 (instregex "ADDI(S)?toc(HA|L)$"), 154 COPY, 155 MCRF, 156 MCRXRX, 157 XSNABSDP, 158 XSXEXPDP, 159 XSABSDP, 160 XSNEGDP, 161 XSCPSGNDP, 162 MFVSRWZ, 163 SRADI_32, 164 RLDIC, 165 RFEBB, 166 LA, 167 TBEGIN, 168 TRECHKPT, 169 NOP, 170 WAIT 171)>; 172 173// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 174// slingle slice. However, since it is Restricted it requires all 3 dispatches 175// (DISP) for that superslice. 176def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 177 (instrs 178 (instregex "RLDC(L|R)$"), 179 (instregex "RLWIMI(8)?$"), 180 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 181 (instregex "M(F|T)OCRF(8)?$"), 182 (instregex "CR(6)?(UN)?SET$"), 183 (instregex "CR(N)?(OR|AND)(C)?$"), 184 (instregex "S(L|R)W(8)?$"), 185 (instregex "RLW(INM|NM)(8)?$"), 186 (instregex "F(N)?ABS(D|S)$"), 187 (instregex "FNEG(D|S)$"), 188 (instregex "FCPSGN(D|S)$"), 189 (instregex "SRAW(I)?$"), 190 (instregex "ISEL(8)?$"), 191 RLDIMI, 192 XSIEXPDP, 193 FMR, 194 CREQV, 195 CRXOR, 196 TRECLAIM, 197 TSR, 198 TABORT 199)>; 200 201// Three cycle ALU vector operation that uses an entire superslice. 202// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 203// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. 204def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, 205 DISP_1C, DISP_1C, DISP_1C], 206 (instrs 207 (instregex "M(T|F)VSCR$"), 208 (instregex "VCMPNEZ(B|H|W)$"), 209 (instregex "VCMPEQU(B|H|W|D)$"), 210 (instregex "VCMPNE(B|H|W)$"), 211 (instregex "VABSDU(B|H|W)$"), 212 (instregex "VADDU(B|H|W)S$"), 213 (instregex "VAVG(S|U)(B|H|W)$"), 214 (instregex "VCMP(EQ|GE|GT)FP(o)?$"), 215 (instregex "VCMPBFP(o)?$"), 216 (instregex "VC(L|T)Z(B|H|W|D)$"), 217 (instregex "VADDS(B|H|W)S$"), 218 (instregex "V(MIN|MAX)FP$"), 219 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 220 VBPERMD, 221 VADDCUW, 222 VPOPCNTW, 223 VPOPCNTD, 224 VPRTYBD, 225 VPRTYBW, 226 VSHASIGMAD, 227 VSHASIGMAW, 228 VSUBSBS, 229 VSUBSHS, 230 VSUBSWS, 231 VSUBUBS, 232 VSUBUHS, 233 VSUBUWS, 234 VSUBCUW, 235 VCMPGTSB, 236 VCMPGTSBo, 237 VCMPGTSD, 238 VCMPGTSDo, 239 VCMPGTSH, 240 VCMPGTSHo, 241 VCMPGTSW, 242 VCMPGTSWo, 243 VCMPGTUB, 244 VCMPGTUBo, 245 VCMPGTUD, 246 VCMPGTUDo, 247 VCMPGTUH, 248 VCMPGTUHo, 249 VCMPGTUW, 250 VCMPGTUWo, 251 VCMPNEBo, 252 VCMPNEHo, 253 VCMPNEWo, 254 VCMPNEZBo, 255 VCMPNEZHo, 256 VCMPNEZWo, 257 VCMPEQUBo, 258 VCMPEQUDo, 259 VCMPEQUHo, 260 VCMPEQUWo, 261 XVCMPEQDP, 262 XVCMPEQDPo, 263 XVCMPEQSP, 264 XVCMPEQSPo, 265 XVCMPGEDP, 266 XVCMPGEDPo, 267 XVCMPGESP, 268 XVCMPGESPo, 269 XVCMPGTDP, 270 XVCMPGTDPo, 271 XVCMPGTSP, 272 XVCMPGTSPo, 273 XVMAXDP, 274 XVMAXSP, 275 XVMINDP, 276 XVMINSP, 277 XVTDIVDP, 278 XVTDIVSP, 279 XVTSQRTDP, 280 XVTSQRTSP, 281 XVTSTDCDP, 282 XVTSTDCSP, 283 XVXSIGDP, 284 XVXSIGSP 285)>; 286 287// 7 cycle DP vector operation that uses an entire superslice. 288// Uses both DP units (the even DPE and odd DPO units), two pipelines 289// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice. 290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, 291 DISP_1C, DISP_1C, DISP_1C], 292 (instrs 293 VADDFP, 294 VCTSXS, 295 VCTSXS_0, 296 VCTUXS, 297 VCTUXS_0, 298 VEXPTEFP, 299 VLOGEFP, 300 VMADDFP, 301 VMHADDSHS, 302 VNMSUBFP, 303 VREFP, 304 VRFIM, 305 VRFIN, 306 VRFIP, 307 VRFIZ, 308 VRSQRTEFP, 309 VSUBFP, 310 XVADDDP, 311 XVADDSP, 312 XVCVDPSP, 313 XVCVDPSXDS, 314 XVCVDPSXWS, 315 XVCVDPUXDS, 316 XVCVDPUXWS, 317 XVCVHPSP, 318 XVCVSPDP, 319 XVCVSPHP, 320 XVCVSPSXDS, 321 XVCVSPSXWS, 322 XVCVSPUXDS, 323 XVCVSPUXWS, 324 XVCVSXDDP, 325 XVCVSXDSP, 326 XVCVSXWDP, 327 XVCVSXWSP, 328 XVCVUXDDP, 329 XVCVUXDSP, 330 XVCVUXWDP, 331 XVCVUXWSP, 332 XVMADDADP, 333 XVMADDASP, 334 XVMADDMDP, 335 XVMADDMSP, 336 XVMSUBADP, 337 XVMSUBASP, 338 XVMSUBMDP, 339 XVMSUBMSP, 340 XVMULDP, 341 XVMULSP, 342 XVNMADDADP, 343 XVNMADDASP, 344 XVNMADDMDP, 345 XVNMADDMSP, 346 XVNMSUBADP, 347 XVNMSUBASP, 348 XVNMSUBMDP, 349 XVNMSUBMSP, 350 XVRDPI, 351 XVRDPIC, 352 XVRDPIM, 353 XVRDPIP, 354 XVRDPIZ, 355 XVREDP, 356 XVRESP, 357 XVRSPI, 358 XVRSPIC, 359 XVRSPIM, 360 XVRSPIP, 361 XVRSPIZ, 362 XVRSQRTEDP, 363 XVRSQRTESP, 364 XVSUBDP, 365 XVSUBSP, 366 VCFSX, 367 VCFSX_0, 368 VCFUX, 369 VCFUX_0, 370 VMHRADDSHS, 371 VMLADDUHM, 372 VMSUMMBM, 373 VMSUMSHM, 374 VMSUMSHS, 375 VMSUMUBM, 376 VMSUMUHM, 377 VMSUMUHS, 378 VMULESB, 379 VMULESH, 380 VMULESW, 381 VMULEUB, 382 VMULEUH, 383 VMULEUW, 384 VMULOSB, 385 VMULOSH, 386 VMULOSW, 387 VMULOUB, 388 VMULOUH, 389 VMULOUW, 390 VMULUWM, 391 VSUM2SWS, 392 VSUM4SBS, 393 VSUM4SHS, 394 VSUM4UBS, 395 VSUMSWS 396)>; 397 398 399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 400// dispatch units for the superslice. 401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 402 (instrs 403 (instregex "MADD(HD|HDU|LD)$"), 404 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") 405)>; 406 407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 408// dispatch units for the superslice. 409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 410 (instrs 411 FRSP, 412 (instregex "FRI(N|P|Z|M)(D|S)$"), 413 (instregex "FRE(S)?$"), 414 (instregex "FADD(S)?$"), 415 (instregex "FMSUB(S)?$"), 416 (instregex "FMADD(S)?$"), 417 (instregex "FSUB(S)?$"), 418 (instregex "FCFID(U)?(S)?$"), 419 (instregex "FCTID(U)?(Z)?$"), 420 (instregex "FCTIW(U)?(Z)?$"), 421 (instregex "FRSQRTE(S)?$"), 422 FNMADDS, 423 FNMADD, 424 FNMSUBS, 425 FNMSUB, 426 FSELD, 427 FSELS, 428 FMULS, 429 FMUL, 430 XSMADDADP, 431 XSMADDASP, 432 XSMADDMDP, 433 XSMADDMSP, 434 XSMSUBADP, 435 XSMSUBASP, 436 XSMSUBMDP, 437 XSMSUBMSP, 438 XSMULDP, 439 XSMULSP, 440 XSNMADDADP, 441 XSNMADDASP, 442 XSNMADDMDP, 443 XSNMADDMSP, 444 XSNMSUBADP, 445 XSNMSUBASP, 446 XSNMSUBMDP, 447 XSNMSUBMSP 448)>; 449 450// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 451// These operations can be done in parallel. 452// The DP is restricted so we need a full 5 dispatches. 453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 454 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 455 (instrs 456 (instregex "FSEL(D|S)o$") 457)>; 458 459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 461 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 462 (instrs 463 (instregex "MUL(H|L)(D|W)(U)?o$") 464)>; 465 466// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 467// These operations must be done sequentially. 468// The DP is restricted so we need a full 5 dispatches. 469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 470 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 471 (instrs 472 (instregex "FRI(N|P|Z|M)(D|S)o$"), 473 (instregex "FRE(S)?o$"), 474 (instregex "FADD(S)?o$"), 475 (instregex "FSUB(S)?o$"), 476 (instregex "F(N)?MSUB(S)?o$"), 477 (instregex "F(N)?MADD(S)?o$"), 478 (instregex "FCFID(U)?(S)?o$"), 479 (instregex "FCTID(U)?(Z)?o$"), 480 (instregex "FCTIW(U)?(Z)?o$"), 481 (instregex "FMUL(S)?o$"), 482 (instregex "FRSQRTE(S)?o$"), 483 FRSPo 484)>; 485 486// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. 487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], 488 (instrs 489 XSADDDP, 490 XSADDSP, 491 XSCVDPHP, 492 XSCVDPSP, 493 XSCVDPSXDS, 494 XSCVDPSXDSs, 495 XSCVDPSXWS, 496 XSCVDPUXDS, 497 XSCVDPUXDSs, 498 XSCVDPUXWS, 499 XSCVDPSXWSs, 500 XSCVDPUXWSs, 501 XSCVHPDP, 502 XSCVSPDP, 503 XSCVSXDDP, 504 XSCVSXDSP, 505 XSCVUXDDP, 506 XSCVUXDSP, 507 XSRDPI, 508 XSRDPIC, 509 XSRDPIM, 510 XSRDPIP, 511 XSRDPIZ, 512 XSREDP, 513 XSRESP, 514 XSRSQRTEDP, 515 XSRSQRTESP, 516 XSSUBDP, 517 XSSUBSP, 518 XSCVDPSPN, 519 XSRSP 520)>; 521 522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 523// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 524// dispatches. 525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], 526 (instrs 527 (instregex "LVS(L|R)$"), 528 (instregex "VSPLTIS(W|H|B)$"), 529 (instregex "VSPLT(W|H|B)(s)?$"), 530 (instregex "V_SETALLONES(B|H)?$"), 531 (instregex "VEXTRACTU(B|H|W)$"), 532 (instregex "VINSERT(B|H|W|D)$"), 533 MFVSRLD, 534 MTVSRWS, 535 VBPERMQ, 536 VCLZLSBB, 537 VCTZLSBB, 538 VEXTRACTD, 539 VEXTUBLX, 540 VEXTUBRX, 541 VEXTUHLX, 542 VEXTUHRX, 543 VEXTUWLX, 544 VEXTUWRX, 545 VGBBD, 546 VMRGHB, 547 VMRGHH, 548 VMRGHW, 549 VMRGLB, 550 VMRGLH, 551 VMRGLW, 552 VPERM, 553 VPERMR, 554 VPERMXOR, 555 VPKPX, 556 VPKSDSS, 557 VPKSDUS, 558 VPKSHSS, 559 VPKSHUS, 560 VPKSWSS, 561 VPKSWUS, 562 VPKUDUM, 563 VPKUDUS, 564 VPKUHUM, 565 VPKUHUS, 566 VPKUWUM, 567 VPKUWUS, 568 VPRTYBQ, 569 VSL, 570 VSLDOI, 571 VSLO, 572 VSLV, 573 VSR, 574 VSRO, 575 VSRV, 576 VUPKHPX, 577 VUPKHSB, 578 VUPKHSH, 579 VUPKHSW, 580 VUPKLPX, 581 VUPKLSB, 582 VUPKLSH, 583 VUPKLSW, 584 XXBRD, 585 XXBRH, 586 XXBRQ, 587 XXBRW, 588 XXEXTRACTUW, 589 XXINSERTW, 590 XXMRGHW, 591 XXMRGLW, 592 XXPERM, 593 XXPERMR, 594 XXSLDWI, 595 XXSLDWIs, 596 XXSPLTIB, 597 XXSPLTW, 598 XXSPLTWs, 599 XXPERMDI, 600 XXPERMDIs, 601 VADDCUQ, 602 VADDECUQ, 603 VADDEUQM, 604 VADDUQM, 605 VMUL10CUQ, 606 VMUL10ECUQ, 607 VMUL10EUQ, 608 VMUL10UQ, 609 VSUBCUQ, 610 VSUBECUQ, 611 VSUBEUQM, 612 VSUBUQM, 613 XSCMPEXPQP, 614 XSCMPOQP, 615 XSCMPUQP, 616 XSTSTDCQP, 617 XSXSIGQP, 618 BCDCFNo, 619 BCDCFZo, 620 BCDCPSGNo, 621 BCDCTNo, 622 BCDCTZo, 623 BCDSETSGNo, 624 BCDSo, 625 BCDTRUNCo, 626 BCDUSo, 627 BCDUTRUNCo 628)>; 629 630// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 631// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 632// dispatches. 633def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 634 (instrs 635 BCDSRo, 636 XSADDQP, 637 XSADDQPO, 638 XSCVDPQP, 639 XSCVQPDP, 640 XSCVQPDPO, 641 XSCVQPSDZ, 642 XSCVQPSWZ, 643 XSCVQPUDZ, 644 XSCVQPUWZ, 645 XSCVSDQP, 646 XSCVUDQP, 647 XSRQPI, 648 XSRQPIX, 649 XSRQPXP, 650 XSSUBQP, 651 XSSUBQPO 652)>; 653 654// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 655// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 656// dispatches. 657def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 658 (instrs 659 BCDCTSQo 660)>; 661 662// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 663// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 664// dispatches. 665def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 666 (instrs 667 XSMADDQP, 668 XSMADDQPO, 669 XSMSUBQP, 670 XSMSUBQPO, 671 XSMULQP, 672 XSMULQPO, 673 XSNMADDQP, 674 XSNMADDQPO, 675 XSNMSUBQP, 676 XSNMSUBQPO 677)>; 678 679// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 680// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 681// dispatches. 682def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 683 (instrs 684 BCDCFSQo 685)>; 686 687// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 688// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 689// dispatches. 690def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 691 (instrs 692 XSDIVQP, 693 XSDIVQPO 694)>; 695 696// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 697// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 698// dispatches. 699def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 700 (instrs 701 XSSQRTQP, 702 XSSQRTQPO 703)>; 704 705// 6 Cycle Load uses a single slice. 706def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C], 707 (instrs 708 (instregex "LXVL(L)?") 709)>; 710 711// 5 Cycle Load uses a single slice. 712def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], 713 (instrs 714 (instregex "LVE(B|H|W)X$"), 715 (instregex "LVX(L)?"), 716 (instregex "LXSI(B|H)ZX$"), 717 LXSDX, 718 LXVB16X, 719 LXVD2X, 720 LXVWSX, 721 LXSIWZX, 722 LXV, 723 LXVX, 724 LXSD, 725 DFLOADf64, 726 XFLOADf64, 727 LIWZX 728)>; 729 730// 4 Cycle Load uses a single slice. 731def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], 732 (instrs 733 (instregex "DCB(F|T|ST)(EP)?$"), 734 (instregex "DCBZ(L)?(EP)?$"), 735 (instregex "DCBTST(EP)?$"), 736 (instregex "CP_COPY(8)?$"), 737 (instregex "CP_PASTE(8)?$"), 738 (instregex "ICBI(EP)?$"), 739 (instregex "ICBT(LS)?$"), 740 (instregex "LBARX(L)?$"), 741 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 742 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 743 (instregex "LH(A|B)RX(L)?(8)?$"), 744 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 745 (instregex "LWARX(L)?$"), 746 (instregex "LWBRX(8)?$"), 747 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 748 CP_ABORT, 749 DARN, 750 EnforceIEIO, 751 ISYNC, 752 MSGSYNC, 753 TLBSYNC, 754 SYNC, 755 LMW, 756 LSWI 757)>; 758 759// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 760// superslice. 761def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], 762 (instrs 763 LFIWZX, 764 LFDX, 765 LFD 766)>; 767 768// Cracked Load Instructions. 769// Load instructions that can be done in parallel. 770def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 771 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 772 (instrs 773 SLBIA, 774 SLBIE, 775 SLBMFEE, 776 SLBMFEV, 777 SLBMTE, 778 TLBIEL 779)>; 780 781// Cracked Load Instruction. 782// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 783// operations can be run in parallel. 784def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 785 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 786 (instrs 787 (instregex "L(W|H)ZU(X)?(8)?$"), 788 TEND 789)>; 790 791// Cracked Store Instruction 792// Consecutive Store and ALU instructions. The store is restricted and requires 793// three dispatches. 794def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 795 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 796 (instrs 797 (instregex "ST(B|H|W|D)CX$") 798)>; 799 800// Cracked Load Instruction. 801// Two consecutive load operations for a total of 8 cycles. 802def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 803 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 804 (instrs 805 LDMX 806)>; 807 808// Cracked Load instruction. 809// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 810// operations cannot be done at the same time and so their latencies are added. 811def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 812 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 813 (instrs 814 (instregex "LHA(X)?(8)?$"), 815 (instregex "CP_PASTE(8)?o$"), 816 (instregex "LWA(X)?(_32)?$"), 817 TCHECK 818)>; 819 820// Cracked Restricted Load instruction. 821// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 822// operations cannot be done at the same time and so their latencies are added. 823// Full 6 dispatches are required as this is both cracked and restricted. 824def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 825 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 826 (instrs 827 LFIWAX 828)>; 829 830// Cracked Load instruction. 831// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 832// operations cannot be done at the same time and so their latencies are added. 833// Full 4 dispatches are required as this is a cracked instruction. 834def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, 835 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 836 (instrs 837 LXSIWAX, 838 LIWAX 839)>; 840 841// Cracked Load instruction. 842// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 843// cycles. The Load and ALU operations cannot be done at the same time and so 844// their latencies are added. 845// Full 6 dispatches are required as this is a restricted instruction. 846def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 847 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 848 (instrs 849 LFSX, 850 LFS 851)>; 852 853// Cracked Load instruction. 854// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 855// operations cannot be done at the same time and so their latencies are added. 856// Full 4 dispatches are required as this is a cracked instruction. 857def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, 858 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 859 (instrs 860 LXSSP, 861 LXSSPX, 862 XFLOADf32, 863 DFLOADf32 864)>; 865 866// Cracked 3-Way Load Instruction 867// Load with two ALU operations that depend on each other 868def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 869 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 870 (instrs 871 (instregex "LHAU(X)?(8)?$"), 872 LWAUX 873)>; 874 875// Cracked Load that requires the PM resource. 876// Since the Load and the PM cannot be done at the same time the latencies are 877// added. Requires 8 cycles. 878// Since the PM requires the full superslice we need both EXECE, EXECO pipelines 879// as well as 3 dispatches for the PM. The Load requires the remaining 2 880// dispatches. 881def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 882 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 883 (instrs 884 LXVH8X, 885 LXVDSX, 886 LXVW4X 887)>; 888 889// Single slice Restricted store operation. The restricted operation requires 890// all three dispatches for the superslice. 891def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], 892 (instrs 893 (instregex "STF(S|D|IWX|SX|DX)$"), 894 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 895 (instregex "STW(8)?$"), 896 (instregex "(D|X)FSTORE(f32|f64)$"), 897 (instregex "ST(W|H|D)BRX$"), 898 (instregex "ST(B|H|D)(8)?$"), 899 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 900 STIWX, 901 SLBIEG, 902 STMW, 903 STSWI, 904 TLBIE 905)>; 906 907// Vector Store Instruction 908// Requires the whole superslice and therefore requires all three dispatches 909// as well as both the Even and Odd exec pipelines. 910def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, 911 DISP_1C, DISP_1C, DISP_1C], 912 (instrs 913 (instregex "STVE(B|H|W)X$"), 914 (instregex "STVX(L)?$"), 915 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 916)>; 917 918// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 919// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 920// dispatches. 921def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 922 (instrs 923 (instregex "MTCTR(8)?(loop)?$"), 924 (instregex "MTLR(8)?$") 925)>; 926 927// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 928// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 929// dispatches. 930def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], 931 (instrs 932 (instregex "M(T|F)VRSAVE(v)?$"), 933 (instregex "M(T|F)PMR$"), 934 (instregex "M(T|F)TB(8)?$"), 935 (instregex "MF(SPR|CTR|LR)(8)?$"), 936 (instregex "M(T|F)MSR(D)?$"), 937 (instregex "MTSPR(8)?$") 938)>; 939 940// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 941// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 942// dispatches. 943def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, 944 DISP_1C, DISP_1C, DISP_1C], 945 (instrs 946 DIVW, 947 DIVWU, 948 MODSW 949)>; 950 951// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 952// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 953// dispatches. 954def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, 955 DISP_1C, DISP_1C, DISP_1C], 956 (instrs 957 DIVWE, 958 DIVD, 959 DIVWEU, 960 DIVDU, 961 MODSD, 962 MODUD, 963 MODUW 964)>; 965 966// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 967// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 968// dispatches. 969def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, 970 DISP_1C, DISP_1C, DISP_1C], 971 (instrs 972 DIVDE, 973 DIVDEU 974)>; 975 976// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 977// and one full superslice for the DIV operation since there is only one DIV 978// per superslice. Latency of DIV plus ALU is 26. 979def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 980 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 981 (instrs 982 (instregex "DIVW(U)?(O)?o$") 983)>; 984 985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 986// and one full superslice for the DIV operation since there is only one DIV 987// per superslice. Latency of DIV plus ALU is 26. 988def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 989 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 990 (instrs 991 DIVDo, 992 DIVDUo, 993 DIVWEo, 994 DIVWEUo 995)>; 996 997// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 998// and one full superslice for the DIV operation since there is only one DIV 999// per superslice. Latency of DIV plus ALU is 42. 1000def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1001 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1002 (instrs 1003 DIVDEo, 1004 DIVDEUo 1005)>; 1006 1007// CR access instructions in _BrMCR, IIC_BrMCRX. 1008 1009// Cracked, restricted, ALU operations. 1010// Here the two ALU ops can actually be done in parallel and therefore the 1011// latencies are not added together. Otherwise this is like having two 1012// instructions running together on two pipelines and 6 dispatches. 1013// ALU ops are 2 cycles each. 1014def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1015 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1016 (instrs 1017 MTCRF, 1018 MTCRF8 1019)>; 1020 1021// Cracked ALU operations. 1022// Here the two ALU ops can actually be done in parallel and therefore the 1023// latencies are not added together. Otherwise this is like having two 1024// instructions running together on two pipelines and 4 dispatches. 1025// ALU ops are 2 cycles each. 1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1027 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1028 (instrs 1029 (instregex "ADDC(8)?o$"), 1030 (instregex "SUBFC(8)?o$") 1031)>; 1032 1033// Cracked ALU operations. 1034// Two ALU ops can be done in parallel. 1035// One is three cycle ALU the ohter is a two cycle ALU. 1036// One of the ALU ops is restricted the other is not so we have a total of 1037// 5 dispatches. 1038def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1039 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1040 (instrs 1041 (instregex "F(N)?ABS(D|S)o$"), 1042 (instregex "FCPSGN(D|S)o$"), 1043 (instregex "FNEG(D|S)o$"), 1044 FMRo 1045)>; 1046 1047// Cracked ALU operations. 1048// Here the two ALU ops can actually be done in parallel and therefore the 1049// latencies are not added together. Otherwise this is like having two 1050// instructions running together on two pipelines and 4 dispatches. 1051// ALU ops are 3 cycles each. 1052def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1053 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1054 (instrs 1055 MCRFS 1056)>; 1057 1058// Cracked Restricted ALU operations. 1059// Here the two ALU ops can actually be done in parallel and therefore the 1060// latencies are not added together. Otherwise this is like having two 1061// instructions running together on two pipelines and 6 dispatches. 1062// ALU ops are 3 cycles each. 1063def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1064 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1065 (instrs 1066 (instregex "MTFSF(b|o)?$"), 1067 (instregex "MTFSFI(o)?$") 1068)>; 1069 1070// Cracked instruction made of two ALU ops. 1071// The two ops cannot be done in parallel. 1072// One of the ALU ops is restricted and takes 3 dispatches. 1073def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1074 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1075 (instrs 1076 (instregex "RLD(I)?C(R|L)o$"), 1077 (instregex "RLW(IMI|INM|NM)(8)?o$"), 1078 (instregex "SLW(8)?o$"), 1079 (instregex "SRAW(I)?o$"), 1080 (instregex "SRW(8)?o$"), 1081 RLDICL_32o, 1082 RLDIMIo 1083)>; 1084 1085// Cracked instruction made of two ALU ops. 1086// The two ops cannot be done in parallel. 1087// Both of the ALU ops are restricted and take 3 dispatches. 1088def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1089 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1090 (instrs 1091 (instregex "MFFS(L|CE|o)?$") 1092)>; 1093 1094// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1095// total of 6 cycles. All of the ALU operations are also restricted so each 1096// takes 3 dispatches for a total of 9. 1097def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1098 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, 1099 DISP_1C, DISP_1C], 1100 (instrs 1101 (instregex "MFCR(8)?$") 1102)>; 1103 1104// Cracked instruction made of two ALU ops. 1105// The two ops cannot be done in parallel. 1106def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1107 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1108 (instrs 1109 (instregex "EXTSWSLIo$"), 1110 (instregex "SRAD(I)?o$"), 1111 SLDo, 1112 SRDo, 1113 RLDICo 1114)>; 1115 1116// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1117def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 1118 (instrs 1119 FDIV 1120)>; 1121 1122// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1123def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1124 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1125 (instrs 1126 FDIVo 1127)>; 1128 1129// 36 Cycle DP Instruction. 1130// Instruction can be done on a single slice. 1131def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C], 1132 (instrs 1133 XSSQRTDP 1134)>; 1135 1136// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1137def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 1138 (instrs 1139 FSQRT 1140)>; 1141 1142// 36 Cycle DP Vector Instruction. 1143def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1144 DISP_1C, DISP_1C, DISP_1C], 1145 (instrs 1146 XVSQRTDP 1147)>; 1148 1149// 27 Cycle DP Vector Instruction. 1150def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1151 DISP_1C, DISP_1C, DISP_1C], 1152 (instrs 1153 XVSQRTSP 1154)>; 1155 1156// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1157def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1158 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1159 (instrs 1160 FSQRTo 1161)>; 1162 1163// 26 Cycle DP Instruction. 1164def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C], 1165 (instrs 1166 XSSQRTSP 1167)>; 1168 1169// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1170def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 1171 (instrs 1172 FSQRTS 1173)>; 1174 1175// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1176def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1177 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1178 (instrs 1179 FSQRTSo 1180)>; 1181 1182// 33 Cycle DP Instruction. Takes one slice and 2 dispatches. 1183def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], 1184 (instrs 1185 XSDIVDP 1186)>; 1187 1188// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1189def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], 1190 (instrs 1191 FDIVS 1192)>; 1193 1194// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1195def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1196 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1197 (instrs 1198 FDIVSo 1199)>; 1200 1201// 22 Cycle DP Instruction. Takes one slice and 2 dispatches. 1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], 1203 (instrs 1204 XSDIVSP 1205)>; 1206 1207// 24 Cycle DP Vector Instruction. Takes one full superslice. 1208// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given 1209// superslice. 1210def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1211 DISP_1C, DISP_1C, DISP_1C], 1212 (instrs 1213 XVDIVSP 1214)>; 1215 1216// 33 Cycle DP Vector Instruction. Takes one full superslice. 1217// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given 1218// superslice. 1219def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1220 DISP_1C, DISP_1C, DISP_1C], 1221 (instrs 1222 XVDIVDP 1223)>; 1224 1225// Instruction cracked into three pieces. One Load and two ALU operations. 1226// The Load and one of the ALU ops cannot be run at the same time and so the 1227// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1228// Both the load and the ALU that depends on it are restricted and so they take 1229// a total of 6 dispatches. The final 2 dispatches come from the second ALU op. 1230// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1231def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1232 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1233 DISP_1C, DISP_1C, DISP_1C, DISP_1C, 1234 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1235 (instrs 1236 (instregex "LF(SU|SUX)$") 1237)>; 1238 1239// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1240// the store and so it can be run at the same time as the store. The store is 1241// also restricted. 1242def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1243 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1244 (instrs 1245 (instregex "STF(S|D)U(X)?$"), 1246 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1247)>; 1248 1249// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1250// the load and so it can be run at the same time as the load. 1251def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1252 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1253 (instrs 1254 (instregex "LBZU(X)?(8)?$"), 1255 (instregex "LDU(X)?$") 1256)>; 1257 1258 1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1260// the load and so it can be run at the same time as the load. The load is also 1261// restricted. 3 dispatches are from the restricted load while the other two 1262// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1263// is required for the ALU. 1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1265 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1266 (instrs 1267 (instregex "LF(DU|DUX)$") 1268)>; 1269 1270// Crypto Instructions 1271 1272// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1273// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 1274// dispatches. 1275def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], 1276 (instrs 1277 (instregex "VPMSUM(B|H|W|D)$"), 1278 (instregex "V(N)?CIPHER(LAST)?$"), 1279 VSBOX 1280)>; 1281 1282// Branch Instructions 1283 1284// Two Cycle Branch 1285def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C], 1286 (instrs 1287 (instregex "BCCCTR(L)?(8)?$"), 1288 (instregex "BCCL(A|R|RL)?$"), 1289 (instregex "BCCTR(L)?(8)?(n)?$"), 1290 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1291 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1292 (instregex "BL(_TLS)?$"), 1293 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1294 (instregex "BLA(8|8_NOP)?$"), 1295 (instregex "BLR(8|L)?$"), 1296 (instregex "TAILB(A)?(8)?$"), 1297 (instregex "TAILBCTR(8)?$"), 1298 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1299 (instregex "BCLR(L)?(n)?$"), 1300 (instregex "BCTR(L)?(8)?$"), 1301 B, 1302 BA, 1303 BC, 1304 BCC, 1305 BCCA, 1306 BCL, 1307 BCLalways, 1308 BCLn, 1309 BCTRL8_LDinto_toc, 1310 BCn, 1311 CTRL_DEP 1312)>; 1313 1314// Five Cycle Branch with a 2 Cycle ALU Op 1315// Operations must be done consecutively and not in parallel. 1316def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, 1317 DISP_1C, DISP_1C, DISP_1C, DISP_1C], 1318 (instrs 1319 ADDPCIS 1320)>; 1321 1322// Special Extracted Instructions For Atomics 1323 1324// Atomic Load 1325def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1326 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1327 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, 1328 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, 1329 DISP_1C], 1330 (instrs 1331 (instregex "L(D|W)AT$") 1332)>; 1333 1334// Atomic Store 1335def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1336 IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, 1337 DISP_1C], 1338 (instrs 1339 (instregex "ST(D|W)AT$") 1340)>; 1341 1342// Signal Processing Engine (SPE) Instructions 1343// These instructions are not supported on Power 9 1344def : InstRW<[], 1345 (instrs 1346 BRINC, 1347 EVABS, 1348 EVEQV, 1349 EVMRA, 1350 EVNAND, 1351 EVNEG, 1352 (instregex "EVADD(I)?W$"), 1353 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1354 (instregex "EVAND(C)?$"), 1355 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1356 (instregex "EVCNTL(S|Z)W$"), 1357 (instregex "EVDIVW(S|U)$"), 1358 (instregex "EVEXTS(B|H)$"), 1359 (instregex "EVLD(H|W|D)(X)?$"), 1360 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1361 (instregex "EVLWHE(X)?$"), 1362 (instregex "EVLWHO(S|U)(X)?$"), 1363 (instregex "EVLW(H|W)SPLAT(X)?$"), 1364 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1365 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1366 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1367 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1368 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1369 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1370 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1371 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1372 (instregex "EVMWHUMI(A)?$"), 1373 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1374 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1375 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1376 (instregex "EVMWSSF(A|AA|AN)?$"), 1377 (instregex "EVMWUMI(A|AA|AN)?$"), 1378 (instregex "EV(N|X)?OR(C)?$"), 1379 (instregex "EVR(LW|LWI|NDW)$"), 1380 (instregex "EVSLW(I)?$"), 1381 (instregex "EVSPLAT(F)?I$"), 1382 (instregex "EVSRW(I)?(S|U)$"), 1383 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1384 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1385 (instregex "EVSUB(I)?FW$") 1386)> { let Unsupported = 1; } 1387 1388// General Instructions without scheduling support. 1389def : InstRW<[], 1390 (instrs 1391 (instregex "(H)?RFI(D)?$"), 1392 (instregex "DSS(ALL)?$"), 1393 (instregex "DST(ST)?(T)?(64)?$"), 1394 (instregex "ICBL(C|Q)$"), 1395 (instregex "L(W|H|B)EPX$"), 1396 (instregex "ST(W|H|B)EPX$"), 1397 (instregex "(L|ST)FDEPX$"), 1398 (instregex "M(T|F)SR(IN)?$"), 1399 (instregex "M(T|F)DCR$"), 1400 (instregex "NOP_GT_PWR(6|7)$"), 1401 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1402 (instregex "WRTEE(I)?$"), 1403 ATTN, 1404 CLRBHRB, 1405 MFBHRBE, 1406 MBAR, 1407 MSYNC, 1408 SLBSYNC, 1409 NAP, 1410 STOP, 1411 TRAP, 1412 RFCI, 1413 RFDI, 1414 RFMCI, 1415 SC, 1416 DCBA, 1417 DCBI, 1418 DCCCI, 1419 ICCCI 1420)> { let Unsupported = 1; } 1421