1//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9//===----------------------------------------------------------------------===// 10// Instruction scheduling annotations for in-order and out-of-order CPUs. 11// These annotations are independent of the itinerary class defined below. 12// Here we define the subtarget independent read/write per-operand resources. 13// The subtarget schedule definitions will then map these to the subtarget's 14// resource usages. 15// For example: 16// The instruction cycle timings table might contain an entry for an operation 17// like the following: 18// Rd <- ADD Rn, Rm, <shift> Rs 19// Uops | Latency from register | Uops - resource requirements - latency 20// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 21// | | uopc Rd, Rn, T0 - P01 - 1 22// This is telling us that the result will be available in destination register 23// Rd after a minimum of three cycles after the result in Rm and Rs is available 24// and one cycle after the result in Rn is available. The micro-ops can execute 25// on resource P01. 26// To model this, we need to express that we need to dispatch two micro-ops, 27// that the resource P01 is needed and that the latency to Rn is different than 28// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by 29// two. 30// We will do this by assigning (abstract) resources to register defs/uses. 31// ARMSchedule.td: 32// def WriteALUsr : SchedWrite; 33// def ReadAdvanceALUsr : ScheRead; 34// 35// ARMInstrInfo.td: 36// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, 37// ReadDefault]> { ...} 38// ReadAdvance read resources allow us to define "pipeline by-passes" or 39// shorter latencies to certain registers as needed in the example above. 40// The "ReadDefault" can be omitted. 41// Next, the subtarget td file assigns resources to the abstract resources 42// defined here. 43// ARMScheduleSubtarget.td: 44// // Resources. 45// def P01 : ProcResource<3>; // ALU unit (3 of it). 46// ... 47// // Resource usages. 48// def : WriteRes<WriteALUsr, [P01, P01]> { 49// Latency = 4; // Latency of 4. 50// NumMicroOps = 2; // Dispatch 2 micro-ops. 51// // The two instances of resource P01 are occupied for one cycle. It is one 52// // cycle because these resources happen to be pipelined. 53// ResourceCycles = [1, 1]; 54// } 55// def : ReadAdvance<ReadAdvanceALUsr, 3>; 56 57//===----------------------------------------------------------------------===// 58// Sched definitions for integer pipeline instructions 59// 60// Basic ALU operation. 61def WriteALU : SchedWrite; 62def ReadALU : SchedRead; 63 64// Basic ALU with shifts. 65def WriteALUsi : SchedWrite; // Shift by immediate. 66def WriteALUsr : SchedWrite; // Shift by register. 67def WriteALUSsr : SchedWrite; // Shift by register (flag setting). 68def ReadALUsr : SchedRead; // Some operands are read later. 69 70// Compares. 71def WriteCMP : SchedWrite; 72def WriteCMPsi : SchedWrite; 73def WriteCMPsr : SchedWrite; 74 75// Multiplys. 76def WriteMUL16 : SchedWrite; // 16-bit multiply. 77def WriteMUL32 : SchedWrite; // 32-bit multiply. 78def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. 79def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. 80def ReadMUL : SchedRead; 81 82// Multiply-accumulates. 83def WriteMAC16 : SchedWrite; // 16-bit mac. 84def WriteMAC32 : SchedWrite; // 32-bit mac. 85def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. 86def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. 87def ReadMAC : SchedRead; 88 89// Divisions. 90def WriteDIV : SchedWrite; 91 92// Loads/Stores. 93def WriteLd : SchedWrite; 94def WritePreLd : SchedWrite; 95def WriteST : SchedWrite; 96 97// Branches. 98def WriteBr : SchedWrite; 99def WriteBrL : SchedWrite; 100def WriteBrTbl : SchedWrite; 101 102// Noop. 103def WriteNoop : SchedWrite; 104 105//===----------------------------------------------------------------------===// 106// Sched definitions for floating-point and neon instructions 107// 108// Floating point conversions 109def WriteFPCVT : SchedWrite; 110def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa 111 112// ALU operations (32/64-bit) 113def WriteFPALU32 : SchedWrite; 114def WriteFPALU64 : SchedWrite; 115 116// Multiplication 117def WriteFPMUL32 : SchedWrite; 118def WriteFPMUL64 : SchedWrite; 119def ReadFPMUL : SchedRead; // multiplier read 120def ReadFPMAC : SchedRead; // accumulator read 121 122// Multiply-accumulate 123def WriteFPMAC32 : SchedWrite; 124def WriteFPMAC64 : SchedWrite; 125 126// Division 127def WriteFPDIV32 : SchedWrite; 128def WriteFPDIV64 : SchedWrite; 129 130// Square-root 131def WriteFPSQRT32 : SchedWrite; 132def WriteFPSQRT64 : SchedWrite; 133 134// Vector load and stores 135def WriteVLD1 : SchedWrite; 136def WriteVLD2 : SchedWrite; 137def WriteVLD3 : SchedWrite; 138def WriteVLD4 : SchedWrite; 139def WriteVST1 : SchedWrite; 140def WriteVST2 : SchedWrite; 141def WriteVST3 : SchedWrite; 142def WriteVST4 : SchedWrite; 143 144 145// Define TII for use in SchedVariant Predicates. 146def : PredicateProlog<[{ 147 const ARMBaseInstrInfo *TII = 148 static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); 149 (void)TII; 150 const ARMSubtarget *STI = 151 static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); 152 (void)STI; 153}]>; 154 155def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; 156 157//===----------------------------------------------------------------------===// 158// Instruction Itinerary classes used for ARM 159// 160def IIC_iALUx : InstrItinClass; 161def IIC_iALUi : InstrItinClass; 162def IIC_iALUr : InstrItinClass; 163def IIC_iALUsi : InstrItinClass; 164def IIC_iALUsir : InstrItinClass; 165def IIC_iALUsr : InstrItinClass; 166def IIC_iBITi : InstrItinClass; 167def IIC_iBITr : InstrItinClass; 168def IIC_iBITsi : InstrItinClass; 169def IIC_iBITsr : InstrItinClass; 170def IIC_iUNAr : InstrItinClass; 171def IIC_iUNAsi : InstrItinClass; 172def IIC_iEXTr : InstrItinClass; 173def IIC_iEXTAr : InstrItinClass; 174def IIC_iEXTAsr : InstrItinClass; 175def IIC_iCMPi : InstrItinClass; 176def IIC_iCMPr : InstrItinClass; 177def IIC_iCMPsi : InstrItinClass; 178def IIC_iCMPsr : InstrItinClass; 179def IIC_iTSTi : InstrItinClass; 180def IIC_iTSTr : InstrItinClass; 181def IIC_iTSTsi : InstrItinClass; 182def IIC_iTSTsr : InstrItinClass; 183def IIC_iMOVi : InstrItinClass; 184def IIC_iMOVr : InstrItinClass; 185def IIC_iMOVsi : InstrItinClass; 186def IIC_iMOVsr : InstrItinClass; 187def IIC_iMOVix2 : InstrItinClass; 188def IIC_iMOVix2addpc : InstrItinClass; 189def IIC_iMOVix2ld : InstrItinClass; 190def IIC_iMVNi : InstrItinClass; 191def IIC_iMVNr : InstrItinClass; 192def IIC_iMVNsi : InstrItinClass; 193def IIC_iMVNsr : InstrItinClass; 194def IIC_iCMOVi : InstrItinClass; 195def IIC_iCMOVr : InstrItinClass; 196def IIC_iCMOVsi : InstrItinClass; 197def IIC_iCMOVsr : InstrItinClass; 198def IIC_iCMOVix2 : InstrItinClass; 199def IIC_iMUL16 : InstrItinClass; 200def IIC_iMAC16 : InstrItinClass; 201def IIC_iMUL32 : InstrItinClass; 202def IIC_iMAC32 : InstrItinClass; 203def IIC_iMUL64 : InstrItinClass; 204def IIC_iMAC64 : InstrItinClass; 205def IIC_iDIV : InstrItinClass; 206def IIC_iLoad_i : InstrItinClass; 207def IIC_iLoad_r : InstrItinClass; 208def IIC_iLoad_si : InstrItinClass; 209def IIC_iLoad_iu : InstrItinClass; 210def IIC_iLoad_ru : InstrItinClass; 211def IIC_iLoad_siu : InstrItinClass; 212def IIC_iLoad_bh_i : InstrItinClass; 213def IIC_iLoad_bh_r : InstrItinClass; 214def IIC_iLoad_bh_si : InstrItinClass; 215def IIC_iLoad_bh_iu : InstrItinClass; 216def IIC_iLoad_bh_ru : InstrItinClass; 217def IIC_iLoad_bh_siu : InstrItinClass; 218def IIC_iLoad_d_i : InstrItinClass; 219def IIC_iLoad_d_r : InstrItinClass; 220def IIC_iLoad_d_ru : InstrItinClass; 221def IIC_iLoad_m : InstrItinClass; 222def IIC_iLoad_mu : InstrItinClass; 223def IIC_iLoad_mBr : InstrItinClass; 224def IIC_iPop : InstrItinClass; 225def IIC_iPop_Br : InstrItinClass; 226def IIC_iLoadiALU : InstrItinClass; 227def IIC_iStore_i : InstrItinClass; 228def IIC_iStore_r : InstrItinClass; 229def IIC_iStore_si : InstrItinClass; 230def IIC_iStore_iu : InstrItinClass; 231def IIC_iStore_ru : InstrItinClass; 232def IIC_iStore_siu : InstrItinClass; 233def IIC_iStore_bh_i : InstrItinClass; 234def IIC_iStore_bh_r : InstrItinClass; 235def IIC_iStore_bh_si : InstrItinClass; 236def IIC_iStore_bh_iu : InstrItinClass; 237def IIC_iStore_bh_ru : InstrItinClass; 238def IIC_iStore_bh_siu : InstrItinClass; 239def IIC_iStore_d_i : InstrItinClass; 240def IIC_iStore_d_r : InstrItinClass; 241def IIC_iStore_d_ru : InstrItinClass; 242def IIC_iStore_m : InstrItinClass; 243def IIC_iStore_mu : InstrItinClass; 244def IIC_Preload : InstrItinClass; 245def IIC_Br : InstrItinClass; 246def IIC_fpSTAT : InstrItinClass; 247def IIC_fpUNA16 : InstrItinClass; 248def IIC_fpUNA32 : InstrItinClass; 249def IIC_fpUNA64 : InstrItinClass; 250def IIC_fpCMP16 : InstrItinClass; 251def IIC_fpCMP32 : InstrItinClass; 252def IIC_fpCMP64 : InstrItinClass; 253def IIC_fpCVTSD : InstrItinClass; 254def IIC_fpCVTDS : InstrItinClass; 255def IIC_fpCVTSH : InstrItinClass; 256def IIC_fpCVTHS : InstrItinClass; 257def IIC_fpCVTIH : InstrItinClass; 258def IIC_fpCVTIS : InstrItinClass; 259def IIC_fpCVTID : InstrItinClass; 260def IIC_fpCVTHI : InstrItinClass; 261def IIC_fpCVTSI : InstrItinClass; 262def IIC_fpCVTDI : InstrItinClass; 263def IIC_fpMOVIS : InstrItinClass; 264def IIC_fpMOVID : InstrItinClass; 265def IIC_fpMOVSI : InstrItinClass; 266def IIC_fpMOVDI : InstrItinClass; 267def IIC_fpALU16 : InstrItinClass; 268def IIC_fpALU32 : InstrItinClass; 269def IIC_fpALU64 : InstrItinClass; 270def IIC_fpMUL16 : InstrItinClass; 271def IIC_fpMUL32 : InstrItinClass; 272def IIC_fpMUL64 : InstrItinClass; 273def IIC_fpMAC16 : InstrItinClass; 274def IIC_fpMAC32 : InstrItinClass; 275def IIC_fpMAC64 : InstrItinClass; 276def IIC_fpFMAC16 : InstrItinClass; 277def IIC_fpFMAC32 : InstrItinClass; 278def IIC_fpFMAC64 : InstrItinClass; 279def IIC_fpDIV16 : InstrItinClass; 280def IIC_fpDIV32 : InstrItinClass; 281def IIC_fpDIV64 : InstrItinClass; 282def IIC_fpSQRT16 : InstrItinClass; 283def IIC_fpSQRT32 : InstrItinClass; 284def IIC_fpSQRT64 : InstrItinClass; 285def IIC_fpLoad16 : InstrItinClass; 286def IIC_fpLoad32 : InstrItinClass; 287def IIC_fpLoad64 : InstrItinClass; 288def IIC_fpLoad_m : InstrItinClass; 289def IIC_fpLoad_mu : InstrItinClass; 290def IIC_fpStore16 : InstrItinClass; 291def IIC_fpStore32 : InstrItinClass; 292def IIC_fpStore64 : InstrItinClass; 293def IIC_fpStore_m : InstrItinClass; 294def IIC_fpStore_mu : InstrItinClass; 295def IIC_VLD1 : InstrItinClass; 296def IIC_VLD1x2 : InstrItinClass; 297def IIC_VLD1x3 : InstrItinClass; 298def IIC_VLD1x4 : InstrItinClass; 299def IIC_VLD1u : InstrItinClass; 300def IIC_VLD1x2u : InstrItinClass; 301def IIC_VLD1x3u : InstrItinClass; 302def IIC_VLD1x4u : InstrItinClass; 303def IIC_VLD1ln : InstrItinClass; 304def IIC_VLD1lnu : InstrItinClass; 305def IIC_VLD1dup : InstrItinClass; 306def IIC_VLD1dupu : InstrItinClass; 307def IIC_VLD2 : InstrItinClass; 308def IIC_VLD2x2 : InstrItinClass; 309def IIC_VLD2u : InstrItinClass; 310def IIC_VLD2x2u : InstrItinClass; 311def IIC_VLD2ln : InstrItinClass; 312def IIC_VLD2lnu : InstrItinClass; 313def IIC_VLD2dup : InstrItinClass; 314def IIC_VLD2dupu : InstrItinClass; 315def IIC_VLD3 : InstrItinClass; 316def IIC_VLD3ln : InstrItinClass; 317def IIC_VLD3u : InstrItinClass; 318def IIC_VLD3lnu : InstrItinClass; 319def IIC_VLD3dup : InstrItinClass; 320def IIC_VLD3dupu : InstrItinClass; 321def IIC_VLD4 : InstrItinClass; 322def IIC_VLD4ln : InstrItinClass; 323def IIC_VLD4u : InstrItinClass; 324def IIC_VLD4lnu : InstrItinClass; 325def IIC_VLD4dup : InstrItinClass; 326def IIC_VLD4dupu : InstrItinClass; 327def IIC_VST1 : InstrItinClass; 328def IIC_VST1x2 : InstrItinClass; 329def IIC_VST1x3 : InstrItinClass; 330def IIC_VST1x4 : InstrItinClass; 331def IIC_VST1u : InstrItinClass; 332def IIC_VST1x2u : InstrItinClass; 333def IIC_VST1x3u : InstrItinClass; 334def IIC_VST1x4u : InstrItinClass; 335def IIC_VST1ln : InstrItinClass; 336def IIC_VST1lnu : InstrItinClass; 337def IIC_VST2 : InstrItinClass; 338def IIC_VST2x2 : InstrItinClass; 339def IIC_VST2u : InstrItinClass; 340def IIC_VST2x2u : InstrItinClass; 341def IIC_VST2ln : InstrItinClass; 342def IIC_VST2lnu : InstrItinClass; 343def IIC_VST3 : InstrItinClass; 344def IIC_VST3u : InstrItinClass; 345def IIC_VST3ln : InstrItinClass; 346def IIC_VST3lnu : InstrItinClass; 347def IIC_VST4 : InstrItinClass; 348def IIC_VST4u : InstrItinClass; 349def IIC_VST4ln : InstrItinClass; 350def IIC_VST4lnu : InstrItinClass; 351def IIC_VUNAD : InstrItinClass; 352def IIC_VUNAQ : InstrItinClass; 353def IIC_VBIND : InstrItinClass; 354def IIC_VBINQ : InstrItinClass; 355def IIC_VPBIND : InstrItinClass; 356def IIC_VFMULD : InstrItinClass; 357def IIC_VFMULQ : InstrItinClass; 358def IIC_VMOV : InstrItinClass; 359def IIC_VMOVImm : InstrItinClass; 360def IIC_VMOVD : InstrItinClass; 361def IIC_VMOVQ : InstrItinClass; 362def IIC_VMOVIS : InstrItinClass; 363def IIC_VMOVID : InstrItinClass; 364def IIC_VMOVISL : InstrItinClass; 365def IIC_VMOVSI : InstrItinClass; 366def IIC_VMOVDI : InstrItinClass; 367def IIC_VMOVN : InstrItinClass; 368def IIC_VPERMD : InstrItinClass; 369def IIC_VPERMQ : InstrItinClass; 370def IIC_VPERMQ3 : InstrItinClass; 371def IIC_VMACD : InstrItinClass; 372def IIC_VMACQ : InstrItinClass; 373def IIC_VFMACD : InstrItinClass; 374def IIC_VFMACQ : InstrItinClass; 375def IIC_VRECSD : InstrItinClass; 376def IIC_VRECSQ : InstrItinClass; 377def IIC_VCNTiD : InstrItinClass; 378def IIC_VCNTiQ : InstrItinClass; 379def IIC_VUNAiD : InstrItinClass; 380def IIC_VUNAiQ : InstrItinClass; 381def IIC_VQUNAiD : InstrItinClass; 382def IIC_VQUNAiQ : InstrItinClass; 383def IIC_VBINiD : InstrItinClass; 384def IIC_VBINiQ : InstrItinClass; 385def IIC_VSUBiD : InstrItinClass; 386def IIC_VSUBiQ : InstrItinClass; 387def IIC_VBINi4D : InstrItinClass; 388def IIC_VBINi4Q : InstrItinClass; 389def IIC_VSUBi4D : InstrItinClass; 390def IIC_VSUBi4Q : InstrItinClass; 391def IIC_VABAD : InstrItinClass; 392def IIC_VABAQ : InstrItinClass; 393def IIC_VSHLiD : InstrItinClass; 394def IIC_VSHLiQ : InstrItinClass; 395def IIC_VSHLi4D : InstrItinClass; 396def IIC_VSHLi4Q : InstrItinClass; 397def IIC_VPALiD : InstrItinClass; 398def IIC_VPALiQ : InstrItinClass; 399def IIC_VMULi16D : InstrItinClass; 400def IIC_VMULi32D : InstrItinClass; 401def IIC_VMULi16Q : InstrItinClass; 402def IIC_VMULi32Q : InstrItinClass; 403def IIC_VMACi16D : InstrItinClass; 404def IIC_VMACi32D : InstrItinClass; 405def IIC_VMACi16Q : InstrItinClass; 406def IIC_VMACi32Q : InstrItinClass; 407def IIC_VEXTD : InstrItinClass; 408def IIC_VEXTQ : InstrItinClass; 409def IIC_VTB1 : InstrItinClass; 410def IIC_VTB2 : InstrItinClass; 411def IIC_VTB3 : InstrItinClass; 412def IIC_VTB4 : InstrItinClass; 413def IIC_VTBX1 : InstrItinClass; 414def IIC_VTBX2 : InstrItinClass; 415def IIC_VTBX3 : InstrItinClass; 416def IIC_VTBX4 : InstrItinClass; 417def IIC_VDOTPROD : InstrItinClass; 418 419//===----------------------------------------------------------------------===// 420// Processor instruction itineraries. 421 422include "ARMScheduleV6.td" 423include "ARMScheduleA8.td" 424include "ARMScheduleA9.td" 425include "ARMScheduleSwift.td" 426include "ARMScheduleR52.td" 427include "ARMScheduleA57.td" 428include "ARMScheduleM3.td" 429