1//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the POWER7 processor.
11//
12//===----------------------------------------------------------------------===//
13
14// Primary reference:
15// IBM POWER7 multicore server processor
16// B. Sinharoy, et al.
17// IBM J. Res. & Dev. (55) 3. May/June 2011.
18
19// Scheduling for the P7 involves tracking two types of resources:
20//  1. The dispatch bundle slots
21//  2. The functional unit resources
22
23// Dispatch units:
24def P7_DU1    : FuncUnit;
25def P7_DU2    : FuncUnit;
26def P7_DU3    : FuncUnit;
27def P7_DU4    : FuncUnit;
28def P7_DU5    : FuncUnit;
29def P7_DU6    : FuncUnit;
30
31def P7_LS1    : FuncUnit; // Load/Store pipeline 1
32def P7_LS2    : FuncUnit; // Load/Store pipeline 2
33
34def P7_FX1    : FuncUnit; // FX pipeline 1
35def P7_FX2    : FuncUnit; // FX pipeline 2
36
37// VS pipeline 1 (vector integer ops. always here)
38def P7_VS1    : FuncUnit; // VS pipeline 1
39// VS pipeline 2 (128-bit stores and perms. here)
40def P7_VS2    : FuncUnit; // VS pipeline 2
41
42def P7_CRU    : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
43def P7_BRU    : FuncUnit; // BR unit
44
45// Notes:
46// Each LSU pipeline can also execute FX add and logical instructions.
47// Each LSU pipeline can complete a load or store in one cycle.
48//
49// Each store is broken into two parts, AGEN goes to the LSU while a
50// "data steering" op. goes to the FXU or VSU.
51//
52// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
53// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
54//
55// Frequent FX ops. take only one cycle and results can be used again in the
56// next cycle (there is a self-bypass). Getting results from the other FX
57// pipeline takes an additional cycle.
58//
59// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
60// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
61// Dispatch of an instruction to VS1 that uses four single prec. inputs
62// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
63// floating point instruction.
64//
65// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
66// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
67// (unlike on the POWER6).
68//
69// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
70// share the same write-back, and have a 5-cycle latency difference, so the
71// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
72// op. has been dispatched to VS1.
73//
74// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
75//
76// Instruction dispatch groups have (at most) four non-branch instructions, and
77// two branches. Unlike on the POWER4/5, a branch does not automatically
78// end the dispatch group, but a second branch must be the last in the group.
79
80def P7Itineraries : ProcessorItineraries<
81  [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
82   P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
83  InstrItinData<IIC_IntSimple   , [InstrStage<1, [P7_DU1, P7_DU2,
84                                                  P7_DU3, P7_DU4], 0>,
85                                   InstrStage<1, [P7_FX1, P7_FX2,
86                                                  P7_LS1, P7_LS2]>],
87                                  [1, 1, 1]>,
88  InstrItinData<IIC_IntGeneral  , [InstrStage<1, [P7_DU1, P7_DU2,
89                                                  P7_DU3, P7_DU4], 0>,
90                                   InstrStage<1, [P7_FX1, P7_FX2]>],
91                                  [1, 1, 1]>,
92  InstrItinData<IIC_IntISEL,      [InstrStage<1, [P7_DU1], 0>,
93                                   InstrStage<1, [P7_FX1, P7_FX2], 0>,
94                                   InstrStage<1, [P7_BRU]>],
95                                  [1, 1, 1, 1]>,
96  InstrItinData<IIC_IntCompare  , [InstrStage<1, [P7_DU1, P7_DU2,
97                                                  P7_DU3, P7_DU4], 0>,
98                                   InstrStage<1, [P7_FX1, P7_FX2]>],
99                                  [1, 1, 1]>,
100  // FIXME: Add record-form itinerary data.
101  InstrItinData<IIC_IntDivW     , [InstrStage<1, [P7_DU1], 0>,
102                                   InstrStage<1, [P7_DU2], 0>,
103                                   InstrStage<36, [P7_FX1, P7_FX2]>],
104                                  [36, 1, 1]>,
105  InstrItinData<IIC_IntDivD     , [InstrStage<1, [P7_DU1], 0>,
106                                   InstrStage<1, [P7_DU2], 0>,
107                                   InstrStage<68, [P7_FX1, P7_FX2]>],
108                                  [68, 1, 1]>,
109  InstrItinData<IIC_IntMulHW    , [InstrStage<1, [P7_DU1, P7_DU2,
110                                                  P7_DU3, P7_DU4], 0>,
111                                   InstrStage<1, [P7_FX1, P7_FX2]>],
112                                  [4, 1, 1]>,
113  InstrItinData<IIC_IntMulHWU   , [InstrStage<1, [P7_DU1, P7_DU2,
114                                                  P7_DU3, P7_DU4], 0>,
115                                   InstrStage<1, [P7_FX1, P7_FX2]>],
116                                  [4, 1, 1]>,
117  InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P7_DU1, P7_DU2,
118                                                  P7_DU3, P7_DU4], 0>,
119                                   InstrStage<1, [P7_FX1, P7_FX2]>],
120                                  [4, 1, 1]>,
121  InstrItinData<IIC_IntRotate   , [InstrStage<1, [P7_DU1, P7_DU2,
122                                                  P7_DU3, P7_DU4], 0>,
123                                   InstrStage<1, [P7_FX1, P7_FX2]>],
124                                   [1, 1, 1]>,
125  InstrItinData<IIC_IntRotateD  , [InstrStage<1, [P7_DU1, P7_DU2,
126                                                  P7_DU3, P7_DU4], 0>,
127                                   InstrStage<1, [P7_FX1, P7_FX2]>],
128                                   [1, 1, 1]>,
129  InstrItinData<IIC_IntShift    , [InstrStage<1, [P7_DU1, P7_DU2,
130                                                  P7_DU3, P7_DU4], 0>,
131                                   InstrStage<1, [P7_FX1, P7_FX2]>],
132                                  [1, 1, 1]>,
133  InstrItinData<IIC_IntTrapW    , [InstrStage<1, [P7_DU1, P7_DU2,
134                                                  P7_DU3, P7_DU4], 0>,
135                                   InstrStage<1, [P7_FX1, P7_FX2]>],
136                                  [1, 1]>,
137  InstrItinData<IIC_IntTrapD    , [InstrStage<1, [P7_DU1, P7_DU2,
138                                                  P7_DU3, P7_DU4], 0>,
139                                   InstrStage<1, [P7_FX1, P7_FX2]>],
140                                  [1, 1]>,
141  InstrItinData<IIC_BrB         , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
142                                   InstrStage<1, [P7_BRU]>],
143                                  [3, 1, 1]>,
144  InstrItinData<IIC_BrCR        , [InstrStage<1, [P7_DU1], 0>,
145                                   InstrStage<1, [P7_CRU]>],
146                                  [3, 1, 1]>,
147  InstrItinData<IIC_BrMCR       , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
148                                   InstrStage<1, [P7_BRU]>],
149                                  [3, 1, 1]>,
150  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
151                                   InstrStage<1, [P7_BRU]>],
152                                  [3, 1, 1]>,
153  InstrItinData<IIC_LdStLoad    , [InstrStage<1, [P7_DU1, P7_DU2,
154                                                  P7_DU3, P7_DU4], 0>,
155                                   InstrStage<1, [P7_LS1, P7_LS2]>],
156                                  [2, 1, 1]>,
157  InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
158                                   InstrStage<1, [P7_DU2], 0>,
159                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
160                                   InstrStage<1, [P7_FX1, P7_FX2]>],
161                                  [2, 2, 1, 1]>,
162  InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
163                                   InstrStage<1, [P7_DU2], 0>,
164                                   InstrStage<1, [P7_DU3], 0>,
165                                   InstrStage<1, [P7_DU4], 0>,
166                                   InstrStage<1, [P7_FX1, P7_FX2]>,
167                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
168                                   InstrStage<1, [P7_FX1, P7_FX2]>],
169                                  [3, 3, 1, 1]>,
170  InstrItinData<IIC_LdStLD      , [InstrStage<1, [P7_DU1, P7_DU2,
171                                                  P7_DU3, P7_DU4], 0>,
172                                   InstrStage<1, [P7_LS1, P7_LS2]>],
173                                  [2, 1, 1]>,
174  InstrItinData<IIC_LdStLDU     , [InstrStage<1, [P7_DU1], 0>,
175                                   InstrStage<1, [P7_DU2], 0>,
176                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
177                                   InstrStage<1, [P7_FX1, P7_FX2]>],
178                                  [2, 2, 1, 1]>,
179  InstrItinData<IIC_LdStLDUX    , [InstrStage<1, [P7_DU1], 0>,
180                                   InstrStage<1, [P7_DU2], 0>,
181                                   InstrStage<1, [P7_DU3], 0>,
182                                   InstrStage<1, [P7_DU4], 0>,
183                                   InstrStage<1, [P7_FX1, P7_FX2]>,
184                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
185                                   InstrStage<1, [P7_FX1, P7_FX2]>],
186                                  [3, 3, 1, 1]>,
187  InstrItinData<IIC_LdStLFD     , [InstrStage<1, [P7_DU1, P7_DU2,
188                                                  P7_DU3, P7_DU4], 0>,
189                                   InstrStage<1, [P7_LS1, P7_LS2]>],
190                                  [3, 1, 1]>,
191  InstrItinData<IIC_LdStLVecX   , [InstrStage<1, [P7_DU1, P7_DU2,
192                                                  P7_DU3, P7_DU4], 0>,
193                                   InstrStage<1, [P7_LS1, P7_LS2]>],
194                                  [3, 1, 1]>,
195  InstrItinData<IIC_LdStLFDU    , [InstrStage<1, [P7_DU1], 0>,
196                                   InstrStage<1, [P7_DU2], 0>,
197                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
198                                   InstrStage<1, [P7_FX1, P7_FX2]>],
199                                  [3, 3, 1, 1]>,
200  InstrItinData<IIC_LdStLFDUX   , [InstrStage<1, [P7_DU1], 0>,
201                                   InstrStage<1, [P7_DU2], 0>,
202                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
203                                   InstrStage<1, [P7_FX1, P7_FX2]>],
204                                  [3, 3, 1, 1]>,
205  InstrItinData<IIC_LdStLHA     , [InstrStage<1, [P7_DU1], 0>,
206                                   InstrStage<1, [P7_DU2], 0>,
207                                   InstrStage<1, [P7_LS1, P7_LS2]>,
208                                   InstrStage<1, [P7_FX1, P7_FX2]>],
209                                  [3, 1, 1]>,
210  InstrItinData<IIC_LdStLHAU    , [InstrStage<1, [P7_DU1], 0>,
211                                   InstrStage<1, [P7_DU2], 0>,
212                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
213                                   InstrStage<1, [P7_FX1, P7_FX2]>,
214                                   InstrStage<1, [P7_FX1, P7_FX2]>],
215                                  [4, 4, 1, 1]>,
216  InstrItinData<IIC_LdStLHAUX   , [InstrStage<1, [P7_DU1], 0>,
217                                   InstrStage<1, [P7_DU2], 0>,
218                                   InstrStage<1, [P7_DU3], 0>,
219                                   InstrStage<1, [P7_DU4], 0>,
220                                   InstrStage<1, [P7_FX1, P7_FX2]>,
221                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
222                                   InstrStage<1, [P7_FX1, P7_FX2]>,
223                                   InstrStage<1, [P7_FX1, P7_FX2]>],
224                                  [4, 4, 1, 1]>,
225  InstrItinData<IIC_LdStLWA     , [InstrStage<1, [P7_DU1], 0>,
226                                   InstrStage<1, [P7_DU2], 0>,
227                                   InstrStage<1, [P7_LS1, P7_LS2]>,
228                                   InstrStage<1, [P7_FX1, P7_FX2]>],
229                                  [3, 1, 1]>,
230  InstrItinData<IIC_LdStLWARX,    [InstrStage<1, [P7_DU1], 0>,
231                                   InstrStage<1, [P7_DU2], 0>,
232                                   InstrStage<1, [P7_DU3], 0>,
233                                   InstrStage<1, [P7_DU4], 0>,
234                                   InstrStage<1, [P7_LS1, P7_LS2]>],
235                                  [3, 1, 1]>,
236  InstrItinData<IIC_LdStLDARX,    [InstrStage<1, [P7_DU1], 0>,
237                                   InstrStage<1, [P7_DU2], 0>,
238                                   InstrStage<1, [P7_DU3], 0>,
239                                   InstrStage<1, [P7_DU4], 0>,
240                                   InstrStage<1, [P7_LS1, P7_LS2]>],
241                                  [3, 1, 1]>,
242  InstrItinData<IIC_LdStLMW     , [InstrStage<1, [P7_DU1, P7_DU2,
243                                                  P7_DU3, P7_DU4], 0>,
244                                   InstrStage<1, [P7_LS1, P7_LS2]>],
245                                  [2, 1, 1]>,
246  InstrItinData<IIC_LdStStore   , [InstrStage<1, [P7_DU1, P7_DU2,
247                                                  P7_DU3, P7_DU4], 0>,
248                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
249                                   InstrStage<1, [P7_FX1, P7_FX2]>],
250                                  [1, 1, 1]>,
251  InstrItinData<IIC_LdStSTD     , [InstrStage<1, [P7_DU1, P7_DU2,
252                                                  P7_DU3, P7_DU4], 0>,
253                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
254                                   InstrStage<1, [P7_FX1, P7_FX2]>],
255                                  [1, 1, 1]>,
256  InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P7_DU1], 0>,
257                                   InstrStage<1, [P7_DU2], 0>,
258                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
259                                   InstrStage<1, [P7_FX1, P7_FX2]>,
260                                   InstrStage<1, [P7_FX1, P7_FX2]>],
261                                  [2, 1, 1, 1]>,
262  InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P7_DU1], 0>,
263                                   InstrStage<1, [P7_DU2], 0>,
264                                   InstrStage<1, [P7_DU3], 0>,
265                                   InstrStage<1, [P7_DU4], 0>,
266                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
267                                   InstrStage<1, [P7_FX1, P7_FX2]>,
268                                   InstrStage<1, [P7_FX1, P7_FX2]>],
269                                  [2, 1, 1, 1]>,
270  InstrItinData<IIC_LdStSTFD    , [InstrStage<1, [P7_DU1, P7_DU2,
271                                                  P7_DU3, P7_DU4], 0>,
272                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
273                                   InstrStage<1, [P7_VS1, P7_VS2]>],
274                                  [1, 1, 1]>,
275  InstrItinData<IIC_LdStSTFDU   , [InstrStage<1, [P7_DU1], 0>,
276                                   InstrStage<1, [P7_DU2], 0>,
277                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
278                                   InstrStage<1, [P7_FX1, P7_FX2], 0>,
279                                   InstrStage<1, [P7_VS1, P7_VS2]>],
280                                  [2, 1, 1, 1]>,
281  InstrItinData<IIC_LdStSTVEBX  , [InstrStage<1, [P7_DU1, P7_DU2,
282                                                  P7_DU3, P7_DU4], 0>,
283                                   InstrStage<1, [P7_LS1, P7_LS2], 0>,
284                                   InstrStage<1, [P7_VS2]>],
285                                  [1, 1, 1]>,
286  InstrItinData<IIC_LdStSTDCX   , [InstrStage<1, [P7_DU1], 0>,
287                                   InstrStage<1, [P7_DU2], 0>,
288                                   InstrStage<1, [P7_DU3], 0>,
289                                   InstrStage<1, [P7_DU4], 0>,
290                                   InstrStage<1, [P7_LS1, P7_LS2]>],
291                                  [1, 1, 1]>,
292  InstrItinData<IIC_LdStSTWCX   , [InstrStage<1, [P7_DU1], 0>,
293                                   InstrStage<1, [P7_DU2], 0>,
294                                   InstrStage<1, [P7_DU3], 0>,
295                                   InstrStage<1, [P7_DU4], 0>,
296                                   InstrStage<1, [P7_LS1, P7_LS2]>],
297                                  [1, 1, 1]>,
298  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P7_DU1], 0>,
299                                   InstrStage<1, [P7_DU2], 0>,
300                                   InstrStage<1, [P7_DU3], 0>,
301                                   InstrStage<1, [P7_DU4], 0>,
302                                   InstrStage<1, [P7_CRU]>,
303                                   InstrStage<1, [P7_FX1, P7_FX2]>],
304                                  [3, 1]>, // mtcr
305  InstrItinData<IIC_SprMFCR     , [InstrStage<1, [P7_DU1], 0>,
306                                   InstrStage<1, [P7_CRU]>],
307                                  [6, 1]>,
308  InstrItinData<IIC_SprMFCRF    , [InstrStage<1, [P7_DU1], 0>,
309                                   InstrStage<1, [P7_CRU]>],
310                                  [3, 1]>,
311  InstrItinData<IIC_SprMTSPR    , [InstrStage<1, [P7_DU1], 0>,
312                                   InstrStage<1, [P7_FX1]>],
313                                  [4, 1]>, // mtctr
314  InstrItinData<IIC_FPGeneral   , [InstrStage<1, [P7_DU1, P7_DU2,
315                                                  P7_DU3, P7_DU4], 0>,
316                                   InstrStage<1, [P7_VS1, P7_VS2]>],
317                                  [5, 1, 1]>,
318  InstrItinData<IIC_FPCompare   , [InstrStage<1, [P7_DU1, P7_DU2,
319                                                  P7_DU3, P7_DU4], 0>,
320                                   InstrStage<1, [P7_VS1, P7_VS2]>],
321                                  [8, 1, 1]>,
322  InstrItinData<IIC_FPDivD      , [InstrStage<1, [P7_DU1, P7_DU2,
323                                                  P7_DU3, P7_DU4], 0>,
324                                   InstrStage<1, [P7_VS1, P7_VS2]>],
325                                  [33, 1, 1]>,
326  InstrItinData<IIC_FPDivS      , [InstrStage<1, [P7_DU1, P7_DU2,
327                                                  P7_DU3, P7_DU4], 0>,
328                                   InstrStage<1, [P7_VS1, P7_VS2]>],
329                                  [27, 1, 1]>,
330  InstrItinData<IIC_FPSqrtD     , [InstrStage<1, [P7_DU1, P7_DU2,
331                                                  P7_DU3, P7_DU4], 0>,
332                                   InstrStage<1, [P7_VS1, P7_VS2]>],
333                                  [44, 1, 1]>,
334  InstrItinData<IIC_FPSqrtS     , [InstrStage<1, [P7_DU1, P7_DU2,
335                                                  P7_DU3, P7_DU4], 0>,
336                                   InstrStage<1, [P7_VS1, P7_VS2]>],
337                                  [32, 1, 1]>,
338  InstrItinData<IIC_FPFused     , [InstrStage<1, [P7_DU1, P7_DU2,
339                                                  P7_DU3, P7_DU4], 0>,
340                                   InstrStage<1, [P7_VS1, P7_VS2]>],
341                                  [5, 1, 1, 1]>,
342  InstrItinData<IIC_FPRes       , [InstrStage<1, [P7_DU1, P7_DU2,
343                                                  P7_DU3, P7_DU4], 0>,
344                                   InstrStage<1, [P7_VS1, P7_VS2]>],
345                                  [5, 1, 1]>,
346  InstrItinData<IIC_VecGeneral  , [InstrStage<1, [P7_DU1], 0>,
347                                   InstrStage<1, [P7_VS1]>],
348                                  [2, 1, 1]>,
349  InstrItinData<IIC_VecVSL      , [InstrStage<1, [P7_DU1], 0>,
350                                   InstrStage<1, [P7_VS1]>],
351                                  [2, 1, 1]>,
352  InstrItinData<IIC_VecVSR      , [InstrStage<1, [P7_DU1], 0>,
353                                   InstrStage<1, [P7_VS1]>],
354                                  [2, 1, 1]>,
355  InstrItinData<IIC_VecFP       , [InstrStage<1, [P7_DU1], 0>,
356                                   InstrStage<1, [P7_VS1, P7_VS2]>],
357                                  [6, 1, 1]>,
358  InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
359                                   InstrStage<1, [P7_VS1, P7_VS2]>],
360                                  [6, 1, 1]>,
361  InstrItinData<IIC_VecFPRound  , [InstrStage<1, [P7_DU1], 0>,
362                                   InstrStage<1, [P7_VS1, P7_VS2]>],
363                                  [6, 1, 1]>,
364  InstrItinData<IIC_VecComplex  , [InstrStage<1, [P7_DU1], 0>,
365                                   InstrStage<1, [P7_VS1]>],
366                                  [7, 1, 1]>,
367  InstrItinData<IIC_VecPerm     , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
368                                   InstrStage<1, [P7_VS2]>],
369                                  [3, 1, 1]>
370]>;
371
372// ===---------------------------------------------------------------------===//
373// P7 machine model for scheduling and other instruction cost heuristics.
374
375def P7Model : SchedMachineModel {
376  let IssueWidth = 6;  // 4 (non-branch) instructions are dispatched per cycle.
377                       // Note that the dispatch bundle size is 6 (including
378                       // branches), but the total internal issue bandwidth per
379                       // cycle (from all queues) is 8.
380
381  let MinLatency = 0;  // Out-of-order dispatch.
382  let LoadLatency = 3; // Optimistic load latency assuming bypass.
383                       // This is overriden by OperandCycles if the
384                       // Itineraries are queried instead.
385  let MispredictPenalty = 16;
386
387  // Try to make sure we have at least 10 dispatch groups in a loop.
388  let LoopMicroOpBufferSize = 40;
389
390  let Itineraries = P7Itineraries;
391}
392
393