1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A9 processors.
11//
12//===----------------------------------------------------------------------===//
13
14// ===---------------------------------------------------------------------===//
15// This section contains legacy support for itineraries. This is
16// required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18//
19// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
20// Reference Manual".
21//
22// Functional units
23def A9_Issue0  : FuncUnit; // Issue 0
24def A9_Issue1  : FuncUnit; // Issue 1
25def A9_Branch  : FuncUnit; // Branch
26def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
27def A9_ALU1    : FuncUnit; // ALU pipeline 1
28def A9_AGU     : FuncUnit; // Address generation unit for ld / st
29def A9_NPipe   : FuncUnit; // NEON pipeline
30def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
31def A9_LSUnit  : FuncUnit; // L/S Unit
32def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
33def A9_DRegsN  : FuncUnit; // FP register set, NEON side
34
35// Bypasses
36def A9_LdBypass : Bypass;
37
38def CortexA9Itineraries : ProcessorItineraries<
39  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
40   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
41  [A9_LdBypass], [
42  // Two fully-pipelined integer ALU pipelines
43
44  //
45  // Move instructions, unconditional
46  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
48  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
50  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
52  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
53                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
54  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
55                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
57  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
61  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
63                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
64                               InstrStage<1, [A9_MUX0], 0>,
65                               InstrStage<1, [A9_AGU], 0>,
66                               InstrStage<1, [A9_LSUnit]>], [5]>,
67  //
68  // MVN instructions
69  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
71                              [1]>,
72  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
73                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
74                              [1, 1], [NoBypass, A9_LdBypass]>,
75  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
76                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
77                              [2, 1]>,
78  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
79                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
80                              [3, 1, 1]>,
81  //
82  // No operand cycles
83  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
85  //
86  // Binary Instructions that produce a result
87  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
88                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
89                            [1, 1], [NoBypass, A9_LdBypass]>,
90  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
91                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
92                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
93  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
95                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
96  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
98                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
99  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
100                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
101                            [3, 1, 1, 1],
102                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
103  //
104  // Bitwise Instructions that produce a result
105  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
109  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
110                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
111  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
112                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
113  //
114  // Unary Instructions that produce a result
115
116  // CLZ, RBIT, etc.
117  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119
120  // BFC, BFI, UBFX, SBFX
121  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
123
124  //
125  // Zero and sign extension instructions
126  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
128  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
130  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
132  //
133  // Compare instructions
134  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
136                               [1], [A9_LdBypass]>,
137  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
139                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
140  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
142                                [1, 1], [A9_LdBypass, NoBypass]>,
143  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
144                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
145                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
146  //
147  // Test instructions
148  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
150  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
152  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
153                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
154  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
156  //
157  // Move instructions, conditional
158  // FIXME: Correctly model the extra input dep on the destination.
159  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
161  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
163  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
165  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
167  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
169                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
170                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171
172  // Integer multiply pipeline
173  //
174  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
176  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
177                               InstrStage<2, [A9_ALU0]>],
178                              [3, 1, 1, 1]>,
179  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
181  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
182                               InstrStage<2, [A9_ALU0]>],
183                              [4, 1, 1, 1]>,
184  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
185                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
186  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                               InstrStage<3, [A9_ALU0]>],
188                              [4, 5, 1, 1]>,
189  // Integer load pipeline
190  // FIXME: The timings are some rough approximations
191  //
192  // Immediate offset
193  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                 InstrStage<1, [A9_MUX0], 0>,
195                                 InstrStage<1, [A9_AGU], 0>,
196                                 InstrStage<1, [A9_LSUnit]>],
197                                [3, 1], [A9_LdBypass]>,
198  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                 InstrStage<1, [A9_MUX0], 0>,
200                                 InstrStage<2, [A9_AGU], 0>,
201                                 InstrStage<1, [A9_LSUnit]>],
202                                [4, 1], [A9_LdBypass]>,
203  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
204  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
205                                 InstrStage<1, [A9_MUX0], 0>,
206                                 InstrStage<2, [A9_AGU], 0>,
207                                 InstrStage<1, [A9_LSUnit]>],
208                                [3, 3, 1], [A9_LdBypass]>,
209  //
210  // Register offset
211  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
212                                 InstrStage<1, [A9_MUX0], 0>,
213                                 InstrStage<1, [A9_AGU], 0>,
214                                 InstrStage<1, [A9_LSUnit]>],
215                                [3, 1, 1], [A9_LdBypass]>,
216  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
217                                 InstrStage<1, [A9_MUX0], 0>,
218                                 InstrStage<2, [A9_AGU], 0>,
219                                 InstrStage<1, [A9_LSUnit]>],
220                                [4, 1, 1], [A9_LdBypass]>,
221  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222                                 InstrStage<1, [A9_MUX0], 0>,
223                                 InstrStage<2, [A9_AGU], 0>,
224                                 InstrStage<1, [A9_LSUnit]>],
225                                [3, 3, 1, 1], [A9_LdBypass]>,
226  //
227  // Scaled register offset
228  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
229                                 InstrStage<1, [A9_MUX0], 0>,
230                                 InstrStage<1, [A9_AGU], 0>,
231                                 InstrStage<1, [A9_LSUnit], 0>],
232                                [4, 1, 1], [A9_LdBypass]>,
233  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234                                 InstrStage<1, [A9_MUX0], 0>,
235                                 InstrStage<2, [A9_AGU], 0>,
236                                 InstrStage<1, [A9_LSUnit]>],
237                                [5, 1, 1], [A9_LdBypass]>,
238  //
239  // Immediate offset with update
240  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
241                                 InstrStage<1, [A9_MUX0], 0>,
242                                 InstrStage<1, [A9_AGU], 0>,
243                                 InstrStage<1, [A9_LSUnit]>],
244                                [3, 2, 1], [A9_LdBypass]>,
245  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
246                                 InstrStage<1, [A9_MUX0], 0>,
247                                 InstrStage<2, [A9_AGU], 0>,
248                                 InstrStage<1, [A9_LSUnit]>],
249                                [4, 3, 1], [A9_LdBypass]>,
250  //
251  // Register offset with update
252  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
253                                 InstrStage<1, [A9_MUX0], 0>,
254                                 InstrStage<1, [A9_AGU], 0>,
255                                 InstrStage<1, [A9_LSUnit]>],
256                                [3, 2, 1, 1], [A9_LdBypass]>,
257  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
258                                 InstrStage<1, [A9_MUX0], 0>,
259                                 InstrStage<2, [A9_AGU], 0>,
260                                 InstrStage<1, [A9_LSUnit]>],
261                                [4, 3, 1, 1], [A9_LdBypass]>,
262  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
263                                 InstrStage<1, [A9_MUX0], 0>,
264                                 InstrStage<2, [A9_AGU], 0>,
265                                 InstrStage<1, [A9_LSUnit]>],
266                                [3, 3, 1, 1], [A9_LdBypass]>,
267  //
268  // Scaled register offset with update
269  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
270                                 InstrStage<1, [A9_MUX0], 0>,
271                                 InstrStage<1, [A9_AGU], 0>,
272                                 InstrStage<1, [A9_LSUnit]>],
273                                [4, 3, 1, 1], [A9_LdBypass]>,
274  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
275                                  InstrStage<1, [A9_MUX0], 0>,
276                                  InstrStage<2, [A9_AGU], 0>,
277                                  InstrStage<1, [A9_LSUnit]>],
278                                 [5, 4, 1, 1], [A9_LdBypass]>,
279  //
280  // Load multiple, def is the 5th operand.
281  // FIXME: This assumes 3 to 4 registers.
282  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
283                                InstrStage<1, [A9_MUX0], 0>,
284                                InstrStage<2, [A9_AGU], 1>,
285                                InstrStage<2, [A9_LSUnit]>],
286                               [1, 1, 1, 1, 3],
287                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
288                         -1>, // dynamic uops
289  //
290  // Load multiple + update, defs are the 1st and 5th operands.
291  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
292                                InstrStage<1, [A9_MUX0], 0>,
293                                InstrStage<2, [A9_AGU], 1>,
294                                InstrStage<2, [A9_LSUnit]>],
295                               [2, 1, 1, 1, 3],
296                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
297                         -1>, // dynamic uops
298  //
299  // Load multiple plus branch
300  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
301                                InstrStage<1, [A9_MUX0], 0>,
302                                InstrStage<1, [A9_AGU], 1>,
303                                InstrStage<2, [A9_LSUnit]>,
304                                InstrStage<1, [A9_Branch]>],
305                               [1, 2, 1, 1, 3],
306                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
307                         -1>, // dynamic uops
308  //
309  // Pop, def is the 3rd operand.
310  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
311                                InstrStage<1, [A9_MUX0], 0>,
312                                InstrStage<2, [A9_AGU], 1>,
313                                InstrStage<2, [A9_LSUnit]>],
314                               [1, 1, 3],
315                               [NoBypass, NoBypass, A9_LdBypass],
316                               -1>, // dynamic uops
317  //
318  // Pop + branch, def is the 3rd operand.
319  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
320                                InstrStage<1, [A9_MUX0], 0>,
321                                InstrStage<2, [A9_AGU], 1>,
322                                InstrStage<2, [A9_LSUnit]>,
323                                InstrStage<1, [A9_Branch]>],
324                               [1, 1, 3],
325                               [NoBypass, NoBypass, A9_LdBypass],
326                               -1>, // dynamic uops
327  //
328  // iLoadi + iALUr for t2LDRpci_pic.
329  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
330                                InstrStage<1, [A9_MUX0], 0>,
331                                InstrStage<1, [A9_AGU], 0>,
332                                InstrStage<1, [A9_LSUnit]>,
333                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
334                               [2, 1]>,
335
336  // Integer store pipeline
337  ///
338  // Immediate offset
339  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                 InstrStage<1, [A9_MUX0], 0>,
341                                 InstrStage<1, [A9_AGU], 0>,
342                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
343  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
344                                 InstrStage<1, [A9_MUX0], 0>,
345                                 InstrStage<2, [A9_AGU], 1>,
346                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
347  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
348  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
349                                 InstrStage<1, [A9_MUX0], 0>,
350                                 InstrStage<2, [A9_AGU], 1>,
351                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
352  //
353  // Register offset
354  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                 InstrStage<1, [A9_MUX0], 0>,
356                                 InstrStage<1, [A9_AGU], 0>,
357                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
359                                 InstrStage<1, [A9_MUX0], 0>,
360                                 InstrStage<2, [A9_AGU], 1>,
361                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
362  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
363                                 InstrStage<1, [A9_MUX0], 0>,
364                                 InstrStage<2, [A9_AGU], 1>,
365                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
366  //
367  // Scaled register offset
368  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
369                                  InstrStage<1, [A9_MUX0], 0>,
370                                  InstrStage<1, [A9_AGU], 0>,
371                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
372  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
373                                  InstrStage<1, [A9_MUX0], 0>,
374                                  InstrStage<2, [A9_AGU], 1>,
375                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
376  //
377  // Immediate offset with update
378  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
379                                  InstrStage<1, [A9_MUX0], 0>,
380                                  InstrStage<1, [A9_AGU], 0>,
381                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
382  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383                                  InstrStage<1, [A9_MUX0], 0>,
384                                  InstrStage<2, [A9_AGU], 1>,
385                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
386  //
387  // Register offset with update
388  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389                                  InstrStage<1, [A9_MUX0], 0>,
390                                  InstrStage<1, [A9_AGU], 0>,
391                                  InstrStage<1, [A9_LSUnit]>],
392                                 [2, 1, 1, 1]>,
393  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
394                                  InstrStage<1, [A9_MUX0], 0>,
395                                  InstrStage<2, [A9_AGU], 1>,
396                                  InstrStage<1, [A9_LSUnit]>],
397                                 [3, 1, 1, 1]>,
398  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
399                                  InstrStage<1, [A9_MUX0], 0>,
400                                  InstrStage<2, [A9_AGU], 1>,
401                                  InstrStage<1, [A9_LSUnit]>],
402                                 [3, 1, 1, 1]>,
403  //
404  // Scaled register offset with update
405  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
406                                    InstrStage<1, [A9_MUX0], 0>,
407                                    InstrStage<1, [A9_AGU], 0>,
408                                    InstrStage<1, [A9_LSUnit]>],
409                                   [2, 1, 1, 1]>,
410  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
411                                    InstrStage<1, [A9_MUX0], 0>,
412                                    InstrStage<2, [A9_AGU], 1>,
413                                    InstrStage<1, [A9_LSUnit]>],
414                                   [3, 1, 1, 1]>,
415  //
416  // Store multiple
417  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
418                                InstrStage<1, [A9_MUX0], 0>,
419                                InstrStage<1, [A9_AGU], 0>,
420                                InstrStage<2, [A9_LSUnit]>],
421                [], [], -1>, // dynamic uops
422  //
423  // Store multiple + update
424  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
425                                InstrStage<1, [A9_MUX0], 0>,
426                                InstrStage<1, [A9_AGU], 0>,
427                                InstrStage<2, [A9_LSUnit]>],
428                [2], [], -1>, // dynamic uops
429  //
430  // Preload
431  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
432
433  // Branch
434  //
435  // no delay slots, so the latency of a branch is unimportant
436  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
437                                InstrStage<1, [A9_Issue1], 0>,
438                                InstrStage<1, [A9_Branch]>]>,
439
440  // VFP and NEON shares the same register file. This means that every VFP
441  // instruction should wait for full completion of the consecutive NEON
442  // instruction and vice-versa. We model this behavior with two artificial FUs:
443  // DRegsVFP and DRegsVFP.
444  //
445  // Every VFP instruction:
446  //  - Acquires DRegsVFP resource for 1 cycle
447  //  - Reserves DRegsN resource for the whole duration (including time to
448  //    register file writeback!).
449  // Every NEON instruction does the same but with FUs swapped.
450  //
451  // Since the reserved FU cannot be acquired, this models precisely
452  // "cross-domain" stalls.
453
454  // VFP
455  // Issue through integer pipeline, and execute in NEON unit.
456
457  // FP Special Register to Integer Register File Move
458  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
459                              InstrStage<1, [A9_MUX0], 0>,
460                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
461                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
462                              InstrStage<1, [A9_NPipe]>],
463                             [1]>,
464  //
465  // Single-precision FP Unary
466  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                               InstrStage<1, [A9_MUX0], 0>,
468                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                               // Extra latency cycles since wbck is 2 cycles
470                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                               InstrStage<1, [A9_NPipe]>],
472                              [1, 1]>,
473  //
474  // Double-precision FP Unary
475  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
476                               InstrStage<1, [A9_MUX0], 0>,
477                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
478                               // Extra latency cycles since wbck is 2 cycles
479                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
480                               InstrStage<1, [A9_NPipe]>],
481                              [1, 1]>,
482
483  //
484  // Single-precision FP Compare
485  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                               InstrStage<1, [A9_MUX0], 0>,
487                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                               // Extra latency cycles since wbck is 4 cycles
489                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                               InstrStage<1, [A9_NPipe]>],
491                              [1, 1]>,
492  //
493  // Double-precision FP Compare
494  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                               InstrStage<1, [A9_MUX0], 0>,
496                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                               // Extra latency cycles since wbck is 4 cycles
498                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
499                               InstrStage<1, [A9_NPipe]>],
500                              [1, 1]>,
501  //
502  // Single to Double FP Convert
503  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
504                               InstrStage<1, [A9_MUX0], 0>,
505                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
506                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
507                               InstrStage<1, [A9_NPipe]>],
508                              [4, 1]>,
509  //
510  // Double to Single FP Convert
511  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                               InstrStage<1, [A9_MUX0], 0>,
513                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                               InstrStage<1, [A9_NPipe]>],
516                              [4, 1]>,
517
518  //
519  // Single to Half FP Convert
520  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
521                               InstrStage<1, [A9_MUX0], 0>,
522                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
523                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
524                               InstrStage<1, [A9_NPipe]>],
525                              [4, 1]>,
526  //
527  // Half to Single FP Convert
528  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                               InstrStage<1, [A9_MUX0], 0>,
530                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
532                               InstrStage<1, [A9_NPipe]>],
533                              [2, 1]>,
534
535  //
536  // Single-Precision FP to Integer Convert
537  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
538                               InstrStage<1, [A9_MUX0], 0>,
539                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
540                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
541                               InstrStage<1, [A9_NPipe]>],
542                              [4, 1]>,
543  //
544  // Double-Precision FP to Integer Convert
545  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
546                               InstrStage<1, [A9_MUX0], 0>,
547                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
548                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
549                               InstrStage<1, [A9_NPipe]>],
550                              [4, 1]>,
551  //
552  // Integer to Single-Precision FP Convert
553  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
554                               InstrStage<1, [A9_MUX0], 0>,
555                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
556                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
557                               InstrStage<1, [A9_NPipe]>],
558                              [4, 1]>,
559  //
560  // Integer to Double-Precision FP Convert
561  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
562                               InstrStage<1, [A9_MUX0], 0>,
563                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
564                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
565                               InstrStage<1, [A9_NPipe]>],
566                              [4, 1]>,
567  //
568  // Single-precision FP ALU
569  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
570                               InstrStage<1, [A9_MUX0], 0>,
571                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
572                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
573                               InstrStage<1, [A9_NPipe]>],
574                              [4, 1, 1]>,
575  //
576  // Double-precision FP ALU
577  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
578                               InstrStage<1, [A9_MUX0], 0>,
579                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
580                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
581                               InstrStage<1, [A9_NPipe]>],
582                              [4, 1, 1]>,
583  //
584  // Single-precision FP Multiply
585  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
586                               InstrStage<1, [A9_MUX0], 0>,
587                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
588                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
589                               InstrStage<1, [A9_NPipe]>],
590                              [5, 1, 1]>,
591  //
592  // Double-precision FP Multiply
593  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
594                               InstrStage<1, [A9_MUX0], 0>,
595                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
596                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
597                               InstrStage<2, [A9_NPipe]>],
598                              [6, 1, 1]>,
599  //
600  // Single-precision FP MAC
601  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
602                               InstrStage<1, [A9_MUX0], 0>,
603                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
604                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
605                               InstrStage<1, [A9_NPipe]>],
606                              [8, 1, 1, 1]>,
607  //
608  // Double-precision FP MAC
609  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
610                               InstrStage<1,  [A9_MUX0], 0>,
611                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
612                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
613                               InstrStage<2,  [A9_NPipe]>],
614                              [9, 1, 1, 1]>,
615  //
616  // Single-precision Fused FP MAC
617  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
618                               InstrStage<1, [A9_MUX0], 0>,
619                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
620                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
621                               InstrStage<1, [A9_NPipe]>],
622                              [8, 1, 1, 1]>,
623  //
624  // Double-precision Fused FP MAC
625  InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
626                               InstrStage<1,  [A9_MUX0], 0>,
627                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
628                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
629                               InstrStage<2,  [A9_NPipe]>],
630                              [9, 1, 1, 1]>,
631  //
632  // Single-precision FP DIV
633  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
634                               InstrStage<1,  [A9_MUX0], 0>,
635                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
636                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
637                               InstrStage<10, [A9_NPipe]>],
638                              [15, 1, 1]>,
639  //
640  // Double-precision FP DIV
641  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
642                               InstrStage<1,  [A9_MUX0], 0>,
643                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
644                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
645                               InstrStage<20, [A9_NPipe]>],
646                              [25, 1, 1]>,
647  //
648  // Single-precision FP SQRT
649  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
650                               InstrStage<1,  [A9_MUX0], 0>,
651                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
652                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
653                               InstrStage<13, [A9_NPipe]>],
654                              [17, 1]>,
655  //
656  // Double-precision FP SQRT
657  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
658                               InstrStage<1,  [A9_MUX0], 0>,
659                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
660                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
661                               InstrStage<28, [A9_NPipe]>],
662                              [32, 1]>,
663
664  //
665  // Integer to Single-precision Move
666  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
667                               InstrStage<1, [A9_MUX0], 0>,
668                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
669                               // Extra 1 latency cycle since wbck is 2 cycles
670                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
671                               InstrStage<1, [A9_NPipe]>],
672                              [1, 1]>,
673  //
674  // Integer to Double-precision Move
675  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                               InstrStage<1, [A9_MUX0], 0>,
677                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
678                               // Extra 1 latency cycle since wbck is 2 cycles
679                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
680                               InstrStage<1, [A9_NPipe]>],
681                              [1, 1, 1]>,
682  //
683  // Single-precision to Integer Move
684  //
685  // On A9 move-from-VFP is free to issue with no stall if other VFP
686  // operations are in flight. I assume it still can't dual-issue though.
687  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
688                               InstrStage<1, [A9_MUX0], 0>],
689                              [2, 1]>,
690  //
691  // Double-precision to Integer Move
692  //
693  // On A9 move-from-VFP is free to issue with no stall if other VFP
694  // operations are in flight. I assume it still can't dual-issue though.
695  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                               InstrStage<1, [A9_MUX0], 0>],
697                              [2, 1, 1]>,
698  //
699  // Single-precision FP Load
700  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
701                               InstrStage<1, [A9_MUX0], 0>,
702                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
703                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
704                               InstrStage<1, [A9_NPipe], 0>,
705                               InstrStage<1, [A9_LSUnit]>],
706                              [1, 1]>,
707  //
708  // Double-precision FP Load
709  // FIXME: Result latency is 1 if address is 64-bit aligned.
710  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
711                               InstrStage<1, [A9_MUX0], 0>,
712                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
713                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
714                               InstrStage<1, [A9_NPipe], 0>,
715                               InstrStage<1, [A9_LSUnit]>],
716                              [2, 1]>,
717  //
718  // FP Load Multiple
719  // FIXME: assumes 2 doubles which requires 2 LS cycles.
720  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                               InstrStage<1, [A9_MUX0], 0>,
722                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
723                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
724                               InstrStage<1, [A9_NPipe], 0>,
725                               InstrStage<2, [A9_LSUnit]>],
726                [1, 1, 1, 1], [], -1>, // dynamic uops
727  //
728  // FP Load Multiple + update
729  // FIXME: assumes 2 doubles which requires 2 LS cycles.
730  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
731                               InstrStage<1, [A9_MUX0], 0>,
732                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
733                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
734                               InstrStage<1, [A9_NPipe], 0>,
735                               InstrStage<2, [A9_LSUnit]>],
736                [2, 1, 1, 1], [], -1>, // dynamic uops
737  //
738  // Single-precision FP Store
739  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
740                               InstrStage<1, [A9_MUX0], 0>,
741                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
742                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
743                               InstrStage<1, [A9_NPipe], 0>,
744                               InstrStage<1, [A9_LSUnit]>],
745                              [1, 1]>,
746  //
747  // Double-precision FP Store
748  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                               InstrStage<1, [A9_MUX0], 0>,
750                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
751                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
752                               InstrStage<1, [A9_NPipe], 0>,
753                               InstrStage<1, [A9_LSUnit]>],
754                              [1, 1]>,
755  //
756  // FP Store Multiple
757  // FIXME: assumes 2 doubles which requires 2 LS cycles.
758  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
759                               InstrStage<1, [A9_MUX0], 0>,
760                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
761                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
762                               InstrStage<1, [A9_NPipe], 0>,
763                               InstrStage<2, [A9_LSUnit]>],
764                [1, 1, 1, 1], [], -1>, // dynamic uops
765  //
766  // FP Store Multiple + update
767  // FIXME: assumes 2 doubles which requires 2 LS cycles.
768  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
769                                InstrStage<1, [A9_MUX0], 0>,
770                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
771                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
772                                InstrStage<1, [A9_NPipe], 0>,
773                                InstrStage<2, [A9_LSUnit]>],
774                [2, 1, 1, 1], [], -1>, // dynamic uops
775  // NEON
776  // VLD1
777  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
778                               InstrStage<1, [A9_MUX0], 0>,
779                               InstrStage<1, [A9_DRegsN],   0, Required>,
780                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
781                               InstrStage<1, [A9_NPipe], 0>,
782                               InstrStage<1, [A9_LSUnit]>],
783                              [1, 1]>,
784  // VLD1x2
785  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
786                               InstrStage<1, [A9_MUX0], 0>,
787                               InstrStage<1, [A9_DRegsN],   0, Required>,
788                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
789                               InstrStage<1, [A9_NPipe], 0>,
790                               InstrStage<1, [A9_LSUnit]>],
791                              [1, 1, 1]>,
792  // VLD1x3
793  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
794                               InstrStage<1, [A9_MUX0], 0>,
795                               InstrStage<1, [A9_DRegsN],   0, Required>,
796                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
797                               InstrStage<2, [A9_NPipe], 0>,
798                               InstrStage<2, [A9_LSUnit]>],
799                              [1, 1, 2, 1]>,
800  // VLD1x4
801  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
802                               InstrStage<1, [A9_MUX0], 0>,
803                               InstrStage<1, [A9_DRegsN],   0, Required>,
804                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
805                               InstrStage<2, [A9_NPipe], 0>,
806                               InstrStage<2, [A9_LSUnit]>],
807                              [1, 1, 2, 2, 1]>,
808  // VLD1u
809  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810                               InstrStage<1, [A9_MUX0], 0>,
811                               InstrStage<1, [A9_DRegsN],   0, Required>,
812                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
813                               InstrStage<1, [A9_NPipe], 0>,
814                               InstrStage<1, [A9_LSUnit]>],
815                              [1, 2, 1]>,
816  // VLD1x2u
817  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
818                               InstrStage<1, [A9_MUX0], 0>,
819                               InstrStage<1, [A9_DRegsN],   0, Required>,
820                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
821                               InstrStage<1, [A9_NPipe], 0>,
822                               InstrStage<1, [A9_LSUnit]>],
823                              [1, 1, 2, 1]>,
824  // VLD1x3u
825  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
826                               InstrStage<1, [A9_MUX0], 0>,
827                               InstrStage<1, [A9_DRegsN],   0, Required>,
828                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829                               InstrStage<2, [A9_NPipe], 0>,
830                               InstrStage<2, [A9_LSUnit]>],
831                              [1, 1, 2, 2, 1]>,
832  // VLD1x4u
833  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
834                               InstrStage<1, [A9_MUX0], 0>,
835                               InstrStage<1, [A9_DRegsN],   0, Required>,
836                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
837                               InstrStage<2, [A9_NPipe], 0>,
838                               InstrStage<2, [A9_LSUnit]>],
839                              [1, 1, 2, 2, 2, 1]>,
840  //
841  // VLD1ln
842  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
843                               InstrStage<1, [A9_MUX0], 0>,
844                               InstrStage<1, [A9_DRegsN],   0, Required>,
845                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846                               InstrStage<2, [A9_NPipe], 0>,
847                               InstrStage<2, [A9_LSUnit]>],
848                              [3, 1, 1, 1]>,
849  //
850  // VLD1lnu
851  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                               InstrStage<1, [A9_MUX0], 0>,
853                               InstrStage<1, [A9_DRegsN],   0, Required>,
854                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
855                               InstrStage<2, [A9_NPipe], 0>,
856                               InstrStage<2, [A9_LSUnit]>],
857                              [3, 2, 1, 1, 1, 1]>,
858  //
859  // VLD1dup
860  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
861                               InstrStage<1, [A9_MUX0], 0>,
862                               InstrStage<1, [A9_DRegsN],   0, Required>,
863                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
864                               InstrStage<1, [A9_NPipe], 0>,
865                               InstrStage<1, [A9_LSUnit]>],
866                              [2, 1]>,
867  //
868  // VLD1dupu
869  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
870                               InstrStage<1, [A9_MUX0], 0>,
871                               InstrStage<1, [A9_DRegsN],   0, Required>,
872                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
873                               InstrStage<1, [A9_NPipe], 0>,
874                               InstrStage<1, [A9_LSUnit]>],
875                              [2, 2, 1, 1]>,
876  //
877  // VLD2
878  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
879                               InstrStage<1, [A9_MUX0], 0>,
880                               InstrStage<1, [A9_DRegsN],   0, Required>,
881                               // Extra latency cycles since wbck is 7 cycles
882                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
883                               InstrStage<1, [A9_NPipe], 0>,
884                               InstrStage<1, [A9_LSUnit]>],
885                              [2, 2, 1]>,
886  //
887  // VLD2x2
888  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889                               InstrStage<1, [A9_MUX0], 0>,
890                               InstrStage<1, [A9_DRegsN],   0, Required>,
891                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
892                               InstrStage<2, [A9_NPipe], 0>,
893                               InstrStage<2, [A9_LSUnit]>],
894                              [2, 3, 2, 3, 1]>,
895  //
896  // VLD2ln
897  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898                               InstrStage<1, [A9_MUX0], 0>,
899                               InstrStage<1, [A9_DRegsN],   0, Required>,
900                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
901                               InstrStage<2, [A9_NPipe], 0>,
902                               InstrStage<2, [A9_LSUnit]>],
903                              [3, 3, 1, 1, 1, 1]>,
904  //
905  // VLD2u
906  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
907                               InstrStage<1, [A9_MUX0], 0>,
908                               InstrStage<1, [A9_DRegsN],   0, Required>,
909                               // Extra latency cycles since wbck is 7 cycles
910                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
911                               InstrStage<1, [A9_NPipe], 0>,
912                               InstrStage<1, [A9_LSUnit]>],
913                              [2, 2, 2, 1, 1, 1]>,
914  //
915  // VLD2x2u
916  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
917                               InstrStage<1, [A9_MUX0], 0>,
918                               InstrStage<1, [A9_DRegsN],   0, Required>,
919                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
920                               InstrStage<2, [A9_NPipe], 0>,
921                               InstrStage<2, [A9_LSUnit]>],
922                              [2, 3, 2, 3, 2, 1]>,
923  //
924  // VLD2lnu
925  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
926                               InstrStage<1, [A9_MUX0], 0>,
927                               InstrStage<1, [A9_DRegsN],   0, Required>,
928                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
929                               InstrStage<2, [A9_NPipe], 0>,
930                               InstrStage<2, [A9_LSUnit]>],
931                              [3, 3, 2, 1, 1, 1, 1, 1]>,
932  //
933  // VLD2dup
934  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
935                               InstrStage<1, [A9_MUX0], 0>,
936                               InstrStage<1, [A9_DRegsN],   0, Required>,
937                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938                               InstrStage<1, [A9_NPipe], 0>,
939                               InstrStage<1, [A9_LSUnit]>],
940                              [2, 2, 1]>,
941  //
942  // VLD2dupu
943  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
944                               InstrStage<1, [A9_MUX0], 0>,
945                               InstrStage<1, [A9_DRegsN],   0, Required>,
946                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
947                               InstrStage<1, [A9_NPipe], 0>,
948                               InstrStage<1, [A9_LSUnit]>],
949                              [2, 2, 2, 1, 1]>,
950  //
951  // VLD3
952  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
953                               InstrStage<1, [A9_MUX0], 0>,
954                               InstrStage<1, [A9_DRegsN],   0, Required>,
955                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
956                               InstrStage<3, [A9_NPipe], 0>,
957                               InstrStage<3, [A9_LSUnit]>],
958                              [3, 3, 4, 1]>,
959  //
960  // VLD3ln
961  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
962                               InstrStage<1, [A9_MUX0], 0>,
963                               InstrStage<1, [A9_DRegsN],   0, Required>,
964                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
965                               InstrStage<5, [A9_NPipe], 0>,
966                               InstrStage<5, [A9_LSUnit]>],
967                              [5, 5, 6, 1, 1, 1, 1, 2]>,
968  //
969  // VLD3u
970  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
971                               InstrStage<1, [A9_MUX0], 0>,
972                               InstrStage<1, [A9_DRegsN],   0, Required>,
973                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
974                               InstrStage<3, [A9_NPipe], 0>,
975                               InstrStage<3, [A9_LSUnit]>],
976                              [3, 3, 4, 2, 1]>,
977  //
978  // VLD3lnu
979  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
980                               InstrStage<1, [A9_MUX0], 0>,
981                               InstrStage<1, [A9_DRegsN],   0, Required>,
982                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
983                               InstrStage<5, [A9_NPipe], 0>,
984                               InstrStage<5, [A9_LSUnit]>],
985                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
986  //
987  // VLD3dup
988  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
989                               InstrStage<1, [A9_MUX0], 0>,
990                               InstrStage<1, [A9_DRegsN],   0, Required>,
991                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
992                               InstrStage<3, [A9_NPipe], 0>,
993                               InstrStage<3, [A9_LSUnit]>],
994                              [3, 3, 4, 1]>,
995  //
996  // VLD3dupu
997  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
998                               InstrStage<1, [A9_MUX0], 0>,
999                               InstrStage<1, [A9_DRegsN],   0, Required>,
1000                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1001                               InstrStage<3, [A9_NPipe], 0>,
1002                               InstrStage<3, [A9_LSUnit]>],
1003                              [3, 3, 4, 2, 1, 1]>,
1004  //
1005  // VLD4
1006  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1007                               InstrStage<1, [A9_MUX0], 0>,
1008                               InstrStage<1, [A9_DRegsN],   0, Required>,
1009                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1010                               InstrStage<3, [A9_NPipe], 0>,
1011                               InstrStage<3, [A9_LSUnit]>],
1012                              [3, 3, 4, 4, 1]>,
1013  //
1014  // VLD4ln
1015  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1016                               InstrStage<1, [A9_MUX0], 0>,
1017                               InstrStage<1, [A9_DRegsN],   0, Required>,
1018                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1019                               InstrStage<4, [A9_NPipe], 0>,
1020                               InstrStage<4, [A9_LSUnit]>],
1021                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1022  //
1023  // VLD4u
1024  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025                               InstrStage<1, [A9_MUX0], 0>,
1026                               InstrStage<1, [A9_DRegsN],   0, Required>,
1027                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1028                               InstrStage<3, [A9_NPipe], 0>,
1029                               InstrStage<3, [A9_LSUnit]>],
1030                              [3, 3, 4, 4, 2, 1]>,
1031  //
1032  // VLD4lnu
1033  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1034                               InstrStage<1, [A9_MUX0], 0>,
1035                               InstrStage<1, [A9_DRegsN],   0, Required>,
1036                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1037                               InstrStage<4, [A9_NPipe], 0>,
1038                               InstrStage<4, [A9_LSUnit]>],
1039                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1040  //
1041  // VLD4dup
1042  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1043                               InstrStage<1, [A9_MUX0], 0>,
1044                               InstrStage<1, [A9_DRegsN],   0, Required>,
1045                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1046                               InstrStage<2, [A9_NPipe], 0>,
1047                               InstrStage<2, [A9_LSUnit]>],
1048                              [2, 2, 3, 3, 1]>,
1049  //
1050  // VLD4dupu
1051  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1052                               InstrStage<1, [A9_MUX0], 0>,
1053                               InstrStage<1, [A9_DRegsN],   0, Required>,
1054                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1055                               InstrStage<2, [A9_NPipe], 0>,
1056                               InstrStage<2, [A9_LSUnit]>],
1057                              [2, 2, 3, 3, 2, 1, 1]>,
1058  //
1059  // VST1
1060  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1061                               InstrStage<1, [A9_MUX0], 0>,
1062                               InstrStage<1, [A9_DRegsN],   0, Required>,
1063                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1064                               InstrStage<1, [A9_NPipe], 0>,
1065                               InstrStage<1, [A9_LSUnit]>],
1066                              [1, 1, 1]>,
1067  //
1068  // VST1x2
1069  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1070                               InstrStage<1, [A9_MUX0], 0>,
1071                               InstrStage<1, [A9_DRegsN],   0, Required>,
1072                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1073                               InstrStage<1, [A9_NPipe], 0>,
1074                               InstrStage<1, [A9_LSUnit]>],
1075                              [1, 1, 1, 1]>,
1076  //
1077  // VST1x3
1078  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079                               InstrStage<1, [A9_MUX0], 0>,
1080                               InstrStage<1, [A9_DRegsN],   0, Required>,
1081                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1082                               InstrStage<2, [A9_NPipe], 0>,
1083                               InstrStage<2, [A9_LSUnit]>],
1084                              [1, 1, 1, 1, 2]>,
1085  //
1086  // VST1x4
1087  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088                               InstrStage<1, [A9_MUX0], 0>,
1089                               InstrStage<1, [A9_DRegsN],   0, Required>,
1090                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1091                               InstrStage<2, [A9_NPipe], 0>,
1092                               InstrStage<2, [A9_LSUnit]>],
1093                              [1, 1, 1, 1, 2, 2]>,
1094  //
1095  // VST1u
1096  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097                               InstrStage<1, [A9_MUX0], 0>,
1098                               InstrStage<1, [A9_DRegsN],   0, Required>,
1099                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1100                               InstrStage<1, [A9_NPipe], 0>,
1101                               InstrStage<1, [A9_LSUnit]>],
1102                              [2, 1, 1, 1, 1]>,
1103  //
1104  // VST1x2u
1105  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106                               InstrStage<1, [A9_MUX0], 0>,
1107                               InstrStage<1, [A9_DRegsN],   0, Required>,
1108                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1109                               InstrStage<1, [A9_NPipe], 0>,
1110                               InstrStage<1, [A9_LSUnit]>],
1111                              [2, 1, 1, 1, 1, 1]>,
1112  //
1113  // VST1x3u
1114  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115                               InstrStage<1, [A9_MUX0], 0>,
1116                               InstrStage<1, [A9_DRegsN],   0, Required>,
1117                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1118                               InstrStage<2, [A9_NPipe], 0>,
1119                               InstrStage<2, [A9_LSUnit]>],
1120                              [2, 1, 1, 1, 1, 1, 2]>,
1121  //
1122  // VST1x4u
1123  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124                               InstrStage<1, [A9_MUX0], 0>,
1125                               InstrStage<1, [A9_DRegsN],   0, Required>,
1126                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1127                               InstrStage<2, [A9_NPipe], 0>,
1128                               InstrStage<2, [A9_LSUnit]>],
1129                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1130  //
1131  // VST1ln
1132  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133                               InstrStage<1, [A9_MUX0], 0>,
1134                               InstrStage<1, [A9_DRegsN],   0, Required>,
1135                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1136                               InstrStage<1, [A9_NPipe], 0>,
1137                               InstrStage<1, [A9_LSUnit]>],
1138                              [1, 1, 1]>,
1139  //
1140  // VST1lnu
1141  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142                               InstrStage<1, [A9_MUX0], 0>,
1143                               InstrStage<1, [A9_DRegsN],   0, Required>,
1144                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1145                               InstrStage<1, [A9_NPipe], 0>,
1146                               InstrStage<1, [A9_LSUnit]>],
1147                              [2, 1, 1, 1, 1]>,
1148  //
1149  // VST2
1150  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151                               InstrStage<1, [A9_MUX0], 0>,
1152                               InstrStage<1, [A9_DRegsN],   0, Required>,
1153                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1154                               InstrStage<1, [A9_NPipe], 0>,
1155                               InstrStage<1, [A9_LSUnit]>],
1156                              [1, 1, 1, 1]>,
1157  //
1158  // VST2x2
1159  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160                               InstrStage<1, [A9_MUX0], 0>,
1161                               InstrStage<1, [A9_DRegsN],   0, Required>,
1162                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1163                               InstrStage<3, [A9_NPipe], 0>,
1164                               InstrStage<3, [A9_LSUnit]>],
1165                              [1, 1, 1, 1, 2, 2]>,
1166  //
1167  // VST2u
1168  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169                               InstrStage<1, [A9_MUX0], 0>,
1170                               InstrStage<1, [A9_DRegsN],   0, Required>,
1171                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1172                               InstrStage<1, [A9_NPipe], 0>,
1173                               InstrStage<1, [A9_LSUnit]>],
1174                              [2, 1, 1, 1, 1, 1]>,
1175  //
1176  // VST2x2u
1177  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178                               InstrStage<1, [A9_MUX0], 0>,
1179                               InstrStage<1, [A9_DRegsN],   0, Required>,
1180                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1181                               InstrStage<3, [A9_NPipe], 0>,
1182                               InstrStage<3, [A9_LSUnit]>],
1183                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1184  //
1185  // VST2ln
1186  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187                               InstrStage<1, [A9_MUX0], 0>,
1188                               InstrStage<1, [A9_DRegsN],   0, Required>,
1189                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1190                               InstrStage<1, [A9_NPipe], 0>,
1191                               InstrStage<1, [A9_LSUnit]>],
1192                              [1, 1, 1, 1]>,
1193  //
1194  // VST2lnu
1195  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196                               InstrStage<1, [A9_MUX0], 0>,
1197                               InstrStage<1, [A9_DRegsN],   0, Required>,
1198                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1199                               InstrStage<1, [A9_NPipe], 0>,
1200                               InstrStage<1, [A9_LSUnit]>],
1201                              [2, 1, 1, 1, 1, 1]>,
1202  //
1203  // VST3
1204  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205                               InstrStage<1, [A9_MUX0], 0>,
1206                               InstrStage<1, [A9_DRegsN],   0, Required>,
1207                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1208                               InstrStage<2, [A9_NPipe], 0>,
1209                               InstrStage<2, [A9_LSUnit]>],
1210                              [1, 1, 1, 1, 2]>,
1211  //
1212  // VST3u
1213  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1214                               InstrStage<1, [A9_MUX0], 0>,
1215                               InstrStage<1, [A9_DRegsN],   0, Required>,
1216                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1217                               InstrStage<2, [A9_NPipe], 0>,
1218                               InstrStage<2, [A9_LSUnit]>],
1219                              [2, 1, 1, 1, 1, 1, 2]>,
1220  //
1221  // VST3ln
1222  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1223                               InstrStage<1, [A9_MUX0], 0>,
1224                               InstrStage<1, [A9_DRegsN],   0, Required>,
1225                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1226                               InstrStage<3, [A9_NPipe], 0>,
1227                               InstrStage<3, [A9_LSUnit]>],
1228                              [1, 1, 1, 1, 2]>,
1229  //
1230  // VST3lnu
1231  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1232                               InstrStage<1, [A9_MUX0], 0>,
1233                               InstrStage<1, [A9_DRegsN],   0, Required>,
1234                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1235                               InstrStage<3, [A9_NPipe], 0>,
1236                               InstrStage<3, [A9_LSUnit]>],
1237                              [2, 1, 1, 1, 1, 1, 2]>,
1238  //
1239  // VST4
1240  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1241                               InstrStage<1, [A9_MUX0], 0>,
1242                               InstrStage<1, [A9_DRegsN],   0, Required>,
1243                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1244                               InstrStage<2, [A9_NPipe], 0>,
1245                               InstrStage<2, [A9_LSUnit]>],
1246                              [1, 1, 1, 1, 2, 2]>,
1247  //
1248  // VST4u
1249  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1250                               InstrStage<1, [A9_MUX0], 0>,
1251                               InstrStage<1, [A9_DRegsN],   0, Required>,
1252                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1253                               InstrStage<2, [A9_NPipe], 0>,
1254                               InstrStage<2, [A9_LSUnit]>],
1255                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1256  //
1257  // VST4ln
1258  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259                               InstrStage<1, [A9_MUX0], 0>,
1260                               InstrStage<1, [A9_DRegsN],   0, Required>,
1261                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1262                               InstrStage<2, [A9_NPipe], 0>,
1263                               InstrStage<2, [A9_LSUnit]>],
1264                              [1, 1, 1, 1, 2, 2]>,
1265  //
1266  // VST4lnu
1267  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268                               InstrStage<1, [A9_MUX0], 0>,
1269                               InstrStage<1, [A9_DRegsN],   0, Required>,
1270                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1271                               InstrStage<2, [A9_NPipe], 0>,
1272                               InstrStage<2, [A9_LSUnit]>],
1273                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1274
1275  //
1276  // Double-register Integer Unary
1277  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1278                               InstrStage<1, [A9_MUX0], 0>,
1279                               InstrStage<1, [A9_DRegsN],   0, Required>,
1280                               // Extra latency cycles since wbck is 6 cycles
1281                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1282                               InstrStage<1, [A9_NPipe]>],
1283                              [4, 2]>,
1284  //
1285  // Quad-register Integer Unary
1286  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1287                               InstrStage<1, [A9_MUX0], 0>,
1288                               InstrStage<1, [A9_DRegsN],   0, Required>,
1289                               // Extra latency cycles since wbck is 6 cycles
1290                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1291                               InstrStage<1, [A9_NPipe]>],
1292                              [4, 2]>,
1293  //
1294  // Double-register Integer Q-Unary
1295  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1296                               InstrStage<1, [A9_MUX0], 0>,
1297                               InstrStage<1, [A9_DRegsN],   0, Required>,
1298                               // Extra latency cycles since wbck is 6 cycles
1299                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1300                               InstrStage<1, [A9_NPipe]>],
1301                              [4, 1]>,
1302  //
1303  // Quad-register Integer CountQ-Unary
1304  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1305                               InstrStage<1, [A9_MUX0], 0>,
1306                               InstrStage<1, [A9_DRegsN],   0, Required>,
1307                               // Extra latency cycles since wbck is 6 cycles
1308                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1309                               InstrStage<1, [A9_NPipe]>],
1310                              [4, 1]>,
1311  //
1312  // Double-register Integer Binary
1313  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1314                               InstrStage<1, [A9_MUX0], 0>,
1315                               InstrStage<1, [A9_DRegsN],   0, Required>,
1316                               // Extra latency cycles since wbck is 6 cycles
1317                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1318                               InstrStage<1, [A9_NPipe]>],
1319                              [3, 2, 2]>,
1320  //
1321  // Quad-register Integer Binary
1322  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323                               InstrStage<1, [A9_MUX0], 0>,
1324                               InstrStage<1, [A9_DRegsN],   0, Required>,
1325                               // Extra latency cycles since wbck is 6 cycles
1326                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327                               InstrStage<1, [A9_NPipe]>],
1328                              [3, 2, 2]>,
1329  //
1330  // Double-register Integer Subtract
1331  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332                               InstrStage<1, [A9_MUX0], 0>,
1333                               InstrStage<1, [A9_DRegsN],   0, Required>,
1334                               // Extra latency cycles since wbck is 6 cycles
1335                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1336                               InstrStage<1, [A9_NPipe]>],
1337                              [3, 2, 1]>,
1338  //
1339  // Quad-register Integer Subtract
1340  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341                               InstrStage<1, [A9_MUX0], 0>,
1342                               InstrStage<1, [A9_DRegsN],   0, Required>,
1343                               // Extra latency cycles since wbck is 6 cycles
1344                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1345                               InstrStage<1, [A9_NPipe]>],
1346                              [3, 2, 1]>,
1347  //
1348  // Double-register Integer Shift
1349  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350                               InstrStage<1, [A9_MUX0], 0>,
1351                               InstrStage<1, [A9_DRegsN],   0, Required>,
1352                               // Extra latency cycles since wbck is 6 cycles
1353                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1354                               InstrStage<1, [A9_NPipe]>],
1355                              [3, 1, 1]>,
1356  //
1357  // Quad-register Integer Shift
1358  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1359                               InstrStage<1, [A9_MUX0], 0>,
1360                               InstrStage<1, [A9_DRegsN],   0, Required>,
1361                               // Extra latency cycles since wbck is 6 cycles
1362                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1363                               InstrStage<1, [A9_NPipe]>],
1364                              [3, 1, 1]>,
1365  //
1366  // Double-register Integer Shift (4 cycle)
1367  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1368                               InstrStage<1, [A9_MUX0], 0>,
1369                               InstrStage<1, [A9_DRegsN],   0, Required>,
1370                               // Extra latency cycles since wbck is 6 cycles
1371                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1372                               InstrStage<1, [A9_NPipe]>],
1373                              [4, 1, 1]>,
1374  //
1375  // Quad-register Integer Shift (4 cycle)
1376  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377                               InstrStage<1, [A9_MUX0], 0>,
1378                               InstrStage<1, [A9_DRegsN],   0, Required>,
1379                               // Extra latency cycles since wbck is 6 cycles
1380                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381                               InstrStage<1, [A9_NPipe]>],
1382                              [4, 1, 1]>,
1383  //
1384  // Double-register Integer Binary (4 cycle)
1385  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386                               InstrStage<1, [A9_MUX0], 0>,
1387                               InstrStage<1, [A9_DRegsN],   0, Required>,
1388                               // Extra latency cycles since wbck is 6 cycles
1389                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1390                               InstrStage<1, [A9_NPipe]>],
1391                              [4, 2, 2]>,
1392  //
1393  // Quad-register Integer Binary (4 cycle)
1394  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1395                               InstrStage<1, [A9_MUX0], 0>,
1396                               InstrStage<1, [A9_DRegsN],   0, Required>,
1397                               // Extra latency cycles since wbck is 6 cycles
1398                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1399                               InstrStage<1, [A9_NPipe]>],
1400                              [4, 2, 2]>,
1401  //
1402  // Double-register Integer Subtract (4 cycle)
1403  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1404                               InstrStage<1, [A9_MUX0], 0>,
1405                               InstrStage<1, [A9_DRegsN],   0, Required>,
1406                               // Extra latency cycles since wbck is 6 cycles
1407                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1408                               InstrStage<1, [A9_NPipe]>],
1409                              [4, 2, 1]>,
1410  //
1411  // Quad-register Integer Subtract (4 cycle)
1412  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1413                               InstrStage<1, [A9_MUX0], 0>,
1414                               InstrStage<1, [A9_DRegsN],   0, Required>,
1415                               // Extra latency cycles since wbck is 6 cycles
1416                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1417                               InstrStage<1, [A9_NPipe]>],
1418                              [4, 2, 1]>,
1419
1420  //
1421  // Double-register Integer Count
1422  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1423                               InstrStage<1, [A9_MUX0], 0>,
1424                               InstrStage<1, [A9_DRegsN],   0, Required>,
1425                               // Extra latency cycles since wbck is 6 cycles
1426                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1427                               InstrStage<1, [A9_NPipe]>],
1428                              [3, 2, 2]>,
1429  //
1430  // Quad-register Integer Count
1431  // Result written in N3, but that is relative to the last cycle of multicycle,
1432  // so we use 4 for those cases
1433  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1434                               InstrStage<1, [A9_MUX0], 0>,
1435                               InstrStage<1, [A9_DRegsN],   0, Required>,
1436                               // Extra latency cycles since wbck is 7 cycles
1437                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1438                               InstrStage<2, [A9_NPipe]>],
1439                              [4, 2, 2]>,
1440  //
1441  // Double-register Absolute Difference and Accumulate
1442  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1443                               InstrStage<1, [A9_MUX0], 0>,
1444                               InstrStage<1, [A9_DRegsN],   0, Required>,
1445                               // Extra latency cycles since wbck is 6 cycles
1446                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1447                               InstrStage<1, [A9_NPipe]>],
1448                              [6, 3, 2, 1]>,
1449  //
1450  // Quad-register Absolute Difference and Accumulate
1451  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1452                               InstrStage<1, [A9_MUX0], 0>,
1453                               InstrStage<1, [A9_DRegsN],   0, Required>,
1454                               // Extra latency cycles since wbck is 6 cycles
1455                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1456                               InstrStage<2, [A9_NPipe]>],
1457                              [6, 3, 2, 1]>,
1458  //
1459  // Double-register Integer Pair Add Long
1460  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461                               InstrStage<1, [A9_MUX0], 0>,
1462                               InstrStage<1, [A9_DRegsN],   0, Required>,
1463                               // Extra latency cycles since wbck is 6 cycles
1464                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1465                               InstrStage<1, [A9_NPipe]>],
1466                              [6, 3, 1]>,
1467  //
1468  // Quad-register Integer Pair Add Long
1469  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                               InstrStage<1, [A9_MUX0], 0>,
1471                               InstrStage<1, [A9_DRegsN],   0, Required>,
1472                               // Extra latency cycles since wbck is 6 cycles
1473                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1474                               InstrStage<2, [A9_NPipe]>],
1475                              [6, 3, 1]>,
1476
1477  //
1478  // Double-register Integer Multiply (.8, .16)
1479  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1480                               InstrStage<1, [A9_MUX0], 0>,
1481                               InstrStage<1, [A9_DRegsN],   0, Required>,
1482                               // Extra latency cycles since wbck is 6 cycles
1483                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1484                               InstrStage<1, [A9_NPipe]>],
1485                              [6, 2, 2]>,
1486  //
1487  // Quad-register Integer Multiply (.8, .16)
1488  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1489                               InstrStage<1, [A9_MUX0], 0>,
1490                               InstrStage<1, [A9_DRegsN],   0, Required>,
1491                               // Extra latency cycles since wbck is 7 cycles
1492                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1493                               InstrStage<2, [A9_NPipe]>],
1494                              [7, 2, 2]>,
1495
1496  //
1497  // Double-register Integer Multiply (.32)
1498  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1499                               InstrStage<1, [A9_MUX0], 0>,
1500                               InstrStage<1, [A9_DRegsN],   0, Required>,
1501                               // Extra latency cycles since wbck is 7 cycles
1502                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1503                               InstrStage<2, [A9_NPipe]>],
1504                              [7, 2, 1]>,
1505  //
1506  // Quad-register Integer Multiply (.32)
1507  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1508                               InstrStage<1, [A9_MUX0], 0>,
1509                               InstrStage<1, [A9_DRegsN],   0, Required>,
1510                               // Extra latency cycles since wbck is 9 cycles
1511                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1512                               InstrStage<4, [A9_NPipe]>],
1513                              [9, 2, 1]>,
1514  //
1515  // Double-register Integer Multiply-Accumulate (.8, .16)
1516  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1517                               InstrStage<1, [A9_MUX0], 0>,
1518                               InstrStage<1, [A9_DRegsN],   0, Required>,
1519                               // Extra latency cycles since wbck is 6 cycles
1520                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1521                               InstrStage<1, [A9_NPipe]>],
1522                              [6, 3, 2, 2]>,
1523  //
1524  // Double-register Integer Multiply-Accumulate (.32)
1525  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1526                               InstrStage<1, [A9_MUX0], 0>,
1527                               InstrStage<1, [A9_DRegsN],   0, Required>,
1528                               // Extra latency cycles since wbck is 7 cycles
1529                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1530                               InstrStage<2, [A9_NPipe]>],
1531                              [7, 3, 2, 1]>,
1532  //
1533  // Quad-register Integer Multiply-Accumulate (.8, .16)
1534  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1535                               InstrStage<1, [A9_MUX0], 0>,
1536                               InstrStage<1, [A9_DRegsN],   0, Required>,
1537                               // Extra latency cycles since wbck is 7 cycles
1538                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1539                               InstrStage<2, [A9_NPipe]>],
1540                              [7, 3, 2, 2]>,
1541  //
1542  // Quad-register Integer Multiply-Accumulate (.32)
1543  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1544                               InstrStage<1, [A9_MUX0], 0>,
1545                               InstrStage<1, [A9_DRegsN],   0, Required>,
1546                               // Extra latency cycles since wbck is 9 cycles
1547                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1548                               InstrStage<4, [A9_NPipe]>],
1549                              [9, 3, 2, 1]>,
1550
1551  //
1552  // Move
1553  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1554                               InstrStage<1, [A9_MUX0], 0>,
1555                               InstrStage<1, [A9_DRegsN],   0, Required>,
1556                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1557                               InstrStage<1, [A9_NPipe]>],
1558                              [1,1]>,
1559  //
1560  // Move Immediate
1561  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562                               InstrStage<1, [A9_MUX0], 0>,
1563                               InstrStage<1, [A9_DRegsN],   0, Required>,
1564                               // Extra latency cycles since wbck is 6 cycles
1565                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1566                               InstrStage<1, [A9_NPipe]>],
1567                              [3]>,
1568  //
1569  // Double-register Permute Move
1570  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571                               InstrStage<1, [A9_MUX0], 0>,
1572                               InstrStage<1, [A9_DRegsN],   0, Required>,
1573                               // Extra latency cycles since wbck is 6 cycles
1574                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1575                               InstrStage<1, [A9_NPipe]>],
1576                              [2, 1]>,
1577  //
1578  // Quad-register Permute Move
1579  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580                               InstrStage<1, [A9_MUX0], 0>,
1581                               InstrStage<1, [A9_DRegsN],   0, Required>,
1582                               // Extra latency cycles since wbck is 6 cycles
1583                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1584                               InstrStage<1, [A9_NPipe]>],
1585                              [2, 1]>,
1586  //
1587  // Integer to Single-precision Move
1588  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1589                               InstrStage<1, [A9_MUX0], 0>,
1590                               InstrStage<1, [A9_DRegsN],   0, Required>,
1591                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1592                               InstrStage<1, [A9_NPipe]>],
1593                              [1, 1]>,
1594  //
1595  // Integer to Double-precision Move
1596  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1597                               InstrStage<1, [A9_MUX0], 0>,
1598                               InstrStage<1, [A9_DRegsN],   0, Required>,
1599                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1600                               InstrStage<1, [A9_NPipe]>],
1601                              [1, 1, 1]>,
1602  //
1603  // Single-precision to Integer Move
1604  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1605                               InstrStage<1, [A9_MUX0], 0>,
1606                               InstrStage<1, [A9_DRegsN],   0, Required>,
1607                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1608                               InstrStage<1, [A9_NPipe]>],
1609                              [2, 1]>,
1610  //
1611  // Double-precision to Integer Move
1612  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1613                               InstrStage<1, [A9_MUX0], 0>,
1614                               InstrStage<1, [A9_DRegsN],   0, Required>,
1615                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1616                               InstrStage<1, [A9_NPipe]>],
1617                              [2, 2, 1]>,
1618  //
1619  // Integer to Lane Move
1620  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                               InstrStage<1, [A9_MUX0], 0>,
1622                               InstrStage<1, [A9_DRegsN],   0, Required>,
1623                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1624                               InstrStage<2, [A9_NPipe]>],
1625                              [3, 1, 1]>,
1626
1627  //
1628  // Vector narrow move
1629  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630                               InstrStage<1, [A9_MUX0], 0>,
1631                               InstrStage<1, [A9_DRegsN],   0, Required>,
1632                               // Extra latency cycles since wbck is 6 cycles
1633                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1634                               InstrStage<1, [A9_NPipe]>],
1635                              [3, 1]>,
1636  //
1637  // Double-register FP Unary
1638  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1639                               InstrStage<1, [A9_MUX0], 0>,
1640                               InstrStage<1, [A9_DRegsN],   0, Required>,
1641                               // Extra latency cycles since wbck is 6 cycles
1642                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1643                               InstrStage<1, [A9_NPipe]>],
1644                              [5, 2]>,
1645  //
1646  // Quad-register FP Unary
1647  // Result written in N5, but that is relative to the last cycle of multicycle,
1648  // so we use 6 for those cases
1649  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1650                               InstrStage<1, [A9_MUX0], 0>,
1651                               InstrStage<1, [A9_DRegsN],   0, Required>,
1652                               // Extra latency cycles since wbck is 7 cycles
1653                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1654                               InstrStage<2, [A9_NPipe]>],
1655                              [6, 2]>,
1656  //
1657  // Double-register FP Binary
1658  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1659  // optimistic.
1660  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1661                               InstrStage<1, [A9_MUX0], 0>,
1662                               InstrStage<1, [A9_DRegsN],   0, Required>,
1663                               // Extra latency cycles since wbck is 6 cycles
1664                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1665                               InstrStage<1, [A9_NPipe]>],
1666                              [5, 2, 2]>,
1667
1668  //
1669  // VPADD, etc.
1670  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1671                               InstrStage<1, [A9_MUX0], 0>,
1672                               InstrStage<1, [A9_DRegsN],   0, Required>,
1673                               // Extra latency cycles since wbck is 6 cycles
1674                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1675                               InstrStage<1, [A9_NPipe]>],
1676                              [5, 1, 1]>,
1677  //
1678  // Double-register FP VMUL
1679  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                               InstrStage<1, [A9_MUX0], 0>,
1681                               InstrStage<1, [A9_DRegsN],   0, Required>,
1682                               // Extra latency cycles since wbck is 6 cycles
1683                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1684                               InstrStage<1, [A9_NPipe]>],
1685                              [5, 2, 1]>,
1686  //
1687  // Quad-register FP Binary
1688  // Result written in N5, but that is relative to the last cycle of multicycle,
1689  // so we use 6 for those cases
1690  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1691  // optimistic.
1692  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693                               InstrStage<1, [A9_MUX0], 0>,
1694                               InstrStage<1, [A9_DRegsN],   0, Required>,
1695                               // Extra latency cycles since wbck is 7 cycles
1696                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1697                               InstrStage<2, [A9_NPipe]>],
1698                              [6, 2, 2]>,
1699  //
1700  // Quad-register FP VMUL
1701  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702                               InstrStage<1, [A9_MUX0], 0>,
1703                               InstrStage<1, [A9_DRegsN],   0, Required>,
1704                               // Extra latency cycles since wbck is 7 cycles
1705                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1706                               InstrStage<1, [A9_NPipe]>],
1707                              [6, 2, 1]>,
1708  //
1709  // Double-register FP Multiple-Accumulate
1710  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1711                               InstrStage<1, [A9_MUX0], 0>,
1712                               InstrStage<1, [A9_DRegsN],   0, Required>,
1713                               // Extra latency cycles since wbck is 7 cycles
1714                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1715                               InstrStage<2, [A9_NPipe]>],
1716                              [6, 3, 2, 1]>,
1717  //
1718  // Quad-register FP Multiple-Accumulate
1719  // Result written in N9, but that is relative to the last cycle of multicycle,
1720  // so we use 10 for those cases
1721  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1722                               InstrStage<1, [A9_MUX0], 0>,
1723                               InstrStage<1, [A9_DRegsN],   0, Required>,
1724                               // Extra latency cycles since wbck is 9 cycles
1725                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1726                               InstrStage<4, [A9_NPipe]>],
1727                              [8, 4, 2, 1]>,
1728  //
1729  // Double-register Fused FP Multiple-Accumulate
1730  InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731                               InstrStage<1, [A9_MUX0], 0>,
1732                               InstrStage<1, [A9_DRegsN],   0, Required>,
1733                               // Extra latency cycles since wbck is 7 cycles
1734                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1735                               InstrStage<2, [A9_NPipe]>],
1736                              [6, 3, 2, 1]>,
1737  //
1738  // Quad-register Fused FP Multiple-Accumulate
1739  // Result written in N9, but that is relative to the last cycle of multicycle,
1740  // so we use 10 for those cases
1741  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742                               InstrStage<1, [A9_MUX0], 0>,
1743                               InstrStage<1, [A9_DRegsN],   0, Required>,
1744                               // Extra latency cycles since wbck is 9 cycles
1745                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1746                               InstrStage<4, [A9_NPipe]>],
1747                              [8, 4, 2, 1]>,
1748  //
1749  // Double-register Reciprical Step
1750  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1751                               InstrStage<1, [A9_MUX0], 0>,
1752                               InstrStage<1, [A9_DRegsN],   0, Required>,
1753                               // Extra latency cycles since wbck is 10 cycles
1754                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1755                               InstrStage<1, [A9_NPipe]>],
1756                              [9, 2, 2]>,
1757  //
1758  // Quad-register Reciprical Step
1759  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1760                               InstrStage<1, [A9_MUX0], 0>,
1761                               InstrStage<1, [A9_DRegsN],   0, Required>,
1762                               // Extra latency cycles since wbck is 11 cycles
1763                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1764                               InstrStage<2, [A9_NPipe]>],
1765                              [10, 2, 2]>,
1766  //
1767  // Double-register Permute
1768  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1769                               InstrStage<1, [A9_MUX0], 0>,
1770                               InstrStage<1, [A9_DRegsN],   0, Required>,
1771                               // Extra latency cycles since wbck is 6 cycles
1772                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1773                               InstrStage<1, [A9_NPipe]>],
1774                              [2, 2, 1, 1]>,
1775  //
1776  // Quad-register Permute
1777  // Result written in N2, but that is relative to the last cycle of multicycle,
1778  // so we use 3 for those cases
1779  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1780                               InstrStage<1, [A9_MUX0], 0>,
1781                               InstrStage<1, [A9_DRegsN],   0, Required>,
1782                               // Extra latency cycles since wbck is 7 cycles
1783                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1784                               InstrStage<2, [A9_NPipe]>],
1785                              [3, 3, 1, 1]>,
1786  //
1787  // Quad-register Permute (3 cycle issue)
1788  // Result written in N2, but that is relative to the last cycle of multicycle,
1789  // so we use 4 for those cases
1790  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1791                               InstrStage<1, [A9_MUX0], 0>,
1792                               InstrStage<1, [A9_DRegsN],   0, Required>,
1793                               // Extra latency cycles since wbck is 8 cycles
1794                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1795                               InstrStage<3, [A9_NPipe]>],
1796                              [4, 4, 1, 1]>,
1797
1798  //
1799  // Double-register VEXT
1800  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1801                               InstrStage<1, [A9_MUX0], 0>,
1802                               InstrStage<1, [A9_DRegsN],   0, Required>,
1803                               // Extra latency cycles since wbck is 6 cycles
1804                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1805                               InstrStage<1, [A9_NPipe]>],
1806                              [2, 1, 1]>,
1807  //
1808  // Quad-register VEXT
1809  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1810                               InstrStage<1, [A9_MUX0], 0>,
1811                               InstrStage<1, [A9_DRegsN],   0, Required>,
1812                               // Extra latency cycles since wbck is 7 cycles
1813                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1814                               InstrStage<2, [A9_NPipe]>],
1815                              [3, 1, 2]>,
1816  //
1817  // VTB
1818  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1819                               InstrStage<1, [A9_MUX0], 0>,
1820                               InstrStage<1, [A9_DRegsN],   0, Required>,
1821                               // Extra latency cycles since wbck is 7 cycles
1822                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1823                               InstrStage<2, [A9_NPipe]>],
1824                              [3, 2, 1]>,
1825  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1826                               InstrStage<1, [A9_MUX0], 0>,
1827                               InstrStage<2, [A9_DRegsN],   0, Required>,
1828                               // Extra latency cycles since wbck is 7 cycles
1829                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1830                               InstrStage<2, [A9_NPipe]>],
1831                              [3, 2, 2, 1]>,
1832  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1833                               InstrStage<1, [A9_MUX0], 0>,
1834                               InstrStage<2, [A9_DRegsN],   0, Required>,
1835                               // Extra latency cycles since wbck is 8 cycles
1836                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1837                               InstrStage<3, [A9_NPipe]>],
1838                              [4, 2, 2, 3, 1]>,
1839  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1840                               InstrStage<1, [A9_MUX0], 0>,
1841                               InstrStage<1, [A9_DRegsN],   0, Required>,
1842                               // Extra latency cycles since wbck is 8 cycles
1843                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1844                               InstrStage<3, [A9_NPipe]>],
1845                              [4, 2, 2, 3, 3, 1]>,
1846  //
1847  // VTBX
1848  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1849                               InstrStage<1, [A9_MUX0], 0>,
1850                               InstrStage<1, [A9_DRegsN],   0, Required>,
1851                               // Extra latency cycles since wbck is 7 cycles
1852                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1853                               InstrStage<2, [A9_NPipe]>],
1854                              [3, 1, 2, 1]>,
1855  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1856                               InstrStage<1, [A9_MUX0], 0>,
1857                               InstrStage<1, [A9_DRegsN],   0, Required>,
1858                               // Extra latency cycles since wbck is 7 cycles
1859                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1860                               InstrStage<2, [A9_NPipe]>],
1861                              [3, 1, 2, 2, 1]>,
1862  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1863                               InstrStage<1, [A9_MUX0], 0>,
1864                               InstrStage<1, [A9_DRegsN],   0, Required>,
1865                               // Extra latency cycles since wbck is 8 cycles
1866                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1867                               InstrStage<3, [A9_NPipe]>],
1868                              [4, 1, 2, 2, 3, 1]>,
1869  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1870                               InstrStage<1, [A9_MUX0], 0>,
1871                               InstrStage<1, [A9_DRegsN],   0, Required>,
1872                               // Extra latency cycles since wbck is 8 cycles
1873                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1874                               InstrStage<2, [A9_NPipe]>],
1875                              [4, 1, 2, 2, 3, 3, 1]>
1876]>;
1877
1878// ===---------------------------------------------------------------------===//
1879// The following definitions describe the simpler per-operand machine model.
1880// This works with MachineScheduler and will eventually replace itineraries.
1881
1882class A9WriteLMOpsListType<list<WriteSequence> writes> {
1883  list <WriteSequence> Writes = writes;
1884  SchedMachineModel SchedModel = ?;
1885}
1886
1887// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1888def CortexA9Model : SchedMachineModel {
1889  let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1890  let MicroOpBufferSize = 56; // Based on available renamed registers.
1891  let LoadLatency = 2; // Optimistic load latency assuming bypass.
1892                       // This is overriden by OperandCycles if the
1893                       // Itineraries are queried instead.
1894  let MispredictPenalty = 8; // Based on estimate of pipeline depth.
1895
1896  let Itineraries = CortexA9Itineraries;
1897
1898  // FIXME: Many vector operations were never given an itinerary. We
1899  // haven't mapped these to the new model either.
1900  let CompleteModel = 0;
1901}
1902
1903//===----------------------------------------------------------------------===//
1904// Define each kind of processor resource and number available.
1905//
1906// The AGU unit has BufferSize=1 so that the latency between operations
1907// that use it are considered to stall other operations.
1908//
1909// The FP unit has BufferSize=0 so that it is a hard dispatch
1910// hazard. No instruction may be dispatched while the unit is reserved.
1911
1912let SchedModel = CortexA9Model in {
1913
1914def A9UnitALU : ProcResource<2>;
1915def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
1916def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
1917def A9UnitLS  : ProcResource<1>;
1918def A9UnitFP  : ProcResource<1> { let BufferSize = 0; }
1919def A9UnitB   : ProcResource<1>;
1920
1921//===----------------------------------------------------------------------===//
1922// Define scheduler read/write types with their resources and latency on A9.
1923
1924// Consume an issue slot, but no processor resources. This is useful when all
1925// other writes associated with the operand have NumMicroOps = 0.
1926def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
1927
1928// Write an integer register.
1929def A9WriteI : SchedWriteRes<[A9UnitALU]>;
1930// Write an integer shifted-by register
1931def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
1932
1933// Basic ALU.
1934def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
1935// ALU with operand shifted by immediate.
1936def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
1937// ALU with operand shifted by register.
1938def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
1939
1940// Multiplication
1941def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
1942def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
1943                                              let NumMicroOps = 0; }
1944def A9WriteM16   : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
1945def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
1946                                                let NumMicroOps = 0; }
1947
1948// Floating-point
1949// Only one FP or AGU instruction may issue per cycle. We model this
1950// by having FP instructions consume the AGU resource.
1951def A9WriteF      : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1952def A9WriteFMov   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1953def A9WriteFMulS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1954def A9WriteFMulD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1955def A9WriteFMAS   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
1956def A9WriteFMAD   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1957def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
1958def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
1959def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
1960def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
1961
1962// NEON has an odd mix of latencies. Simply name the write types by latency.
1963def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1964def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
1965def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
1966def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1967def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1968def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1969def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
1970def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1971def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
1972
1973// Reserve A9UnitFP for 2 consecutive cycles.
1974def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1975  let Latency = 4;
1976  let ResourceCycles = [2];
1977}
1978def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1979  let Latency = 7;
1980  let ResourceCycles = [2];
1981}
1982def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1983  let Latency = 9;
1984  let ResourceCycles = [2];
1985}
1986
1987// Branches don't have a def operand but still consume resources.
1988def A9WriteB : SchedWriteRes<[A9UnitB]>;
1989
1990// Address generation.
1991def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
1992
1993// Load Integer.
1994def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
1995// Load the upper 32-bits using the same micro-op.
1996def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
1997                                     let NumMicroOps = 0; }
1998// Offset shifted by register.
1999def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2000// Load (and zero extend) a byte.
2001def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2002def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
2003
2004// Load or Store Float, aligned.
2005def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
2006
2007// Store Integer.
2008def A9WriteS : SchedWriteRes<[A9UnitLS]>;
2009
2010//===----------------------------------------------------------------------===//
2011// Define resources dynamically for load multiple variants.
2012
2013// Define helpers for extra latency without consuming resources.
2014def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
2015foreach NumCycles = 2-8 in {
2016def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
2017} // foreach NumCycles
2018
2019// Define address generation sequences and predicates for 8 flavors of LDMs.
2020foreach NumAddr = 1-8 in {
2021
2022// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2023// latency for instructions that generate multiple loads or stores.
2024def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
2025
2026// Define a predicate to select the LDM based on number of memory addresses.
2027def A9LMAdr#NumAddr#Pred :
2028  SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
2029
2030} // foreach NumAddr
2031
2032// Fall-back for unknown LDMs.
2033def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
2034
2035// LDM/VLDM/VLDn address generation latency & resources.
2036// Dynamically select the A9WriteAdrN sequence using a predicate.
2037def A9WriteLMAdr : SchedWriteVariant<[
2038  SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
2039  SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
2040  SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
2041  SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
2042  SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
2043  SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
2044  SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
2045  SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
2046  // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2047  SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
2048
2049// Define LDM Resources.
2050// These take no issue resource, so they can be combined with other
2051// writes like WriteB.
2052// A9WriteLMLo takes a single LS resource and 2 cycles.
2053def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
2054                                              let NumMicroOps = 0; }
2055// Assuming aligned access, the upper half of each pair is free with
2056// the same latency.
2057def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
2058                                      let NumMicroOps = 0; }
2059// Each A9WriteL#N variant adds N cycles of latency without consuming
2060// additional resources.
2061foreach NumAddr = 1-8 in {
2062def A9WriteL#NumAddr : WriteSequence<
2063  [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2064def A9WriteL#NumAddr#Hi : WriteSequence<
2065  [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2066}
2067
2068//===----------------------------------------------------------------------===//
2069// LDM: Load multiple into 32-bit integer registers.
2070
2071def A9WriteLMOpsList : A9WriteLMOpsListType<
2072                 [A9WriteL1, A9WriteL1Hi,
2073                  A9WriteL2, A9WriteL2Hi,
2074                  A9WriteL3, A9WriteL3Hi,
2075                  A9WriteL4, A9WriteL4Hi,
2076                  A9WriteL5, A9WriteL5Hi,
2077                  A9WriteL6, A9WriteL6Hi,
2078                  A9WriteL7, A9WriteL7Hi,
2079                  A9WriteL8, A9WriteL8Hi]>;
2080
2081// A9WriteLM variants expand into a pair of writes for each 64-bit
2082// value loaded. When the number of registers is odd, the last
2083// A9WriteLnHi is naturally ignored because the instruction has no
2084// following def operands.  These variants take no issue resource, so
2085// they may need to be part of a WriteSequence that includes A9WriteIssue.
2086def A9WriteLM : SchedWriteVariant<[
2087  SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2088  SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2089  SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2090  SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2091  SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2092  SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2093  SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2094  SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
2095  // For unknown LDMs, define the maximum number of writes, but only
2096  // make the first two consume resources.
2097  SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
2098                             A9WriteL2, A9WriteL2Hi,
2099                             A9WriteL3Hi, A9WriteL3Hi,
2100                             A9WriteL4Hi, A9WriteL4Hi,
2101                             A9WriteL5Hi, A9WriteL5Hi,
2102                             A9WriteL6Hi, A9WriteL6Hi,
2103                             A9WriteL7Hi, A9WriteL7Hi,
2104                             A9WriteL8Hi, A9WriteL8Hi]>]> {
2105  let Variadic = 1;
2106}
2107
2108//===----------------------------------------------------------------------===//
2109// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
2110
2111// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
2112// so can be used in WriteSequences for in single-issue instructions that
2113// encapsulate multiple loads.
2114def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
2115  let Latency = 1;
2116  let NumMicroOps = 0;
2117}
2118
2119foreach NumAddr = 1-8 in {
2120
2121// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2122def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
2123
2124// A9WriteLfp1-8 definitions are statically expanded into a sequence of
2125// A9WriteLfpOps with additive latency that takes a single issue slot.
2126// Used directly to describe NEON VLDn.
2127def A9WriteLfp#NumAddr : WriteSequence<
2128  [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2129
2130// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2131// permuting loaded values.
2132def A9WriteLfp#NumAddr#Mov : WriteSequence<
2133  [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2134
2135} // foreach NumAddr
2136
2137// Define VLDM/VSTM PreRA resources.
2138// A9WriteLMfpPreRA are dynamically expanded into the correct
2139// A9WriteLfp1-8 sequence based on a predicate. This supports the
2140// preRA VLDM variants in which all 64-bit loads are written to the
2141// same tuple of either single or double precision registers.
2142def A9WriteLMfpPreRA : SchedWriteVariant<[
2143  SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
2144  SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
2145  SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
2146  SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
2147  SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
2148  SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
2149  SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
2150  SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
2151  // For unknown VLDM/VSTM PreRA, assume 2xS registers.
2152  SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
2153
2154// Define VLDM/VSTM PostRA Resources.
2155// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
2156def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
2157
2158foreach NumAddr = 1-8 in {
2159
2160// Each A9WriteL#N variant adds N cycles of latency without consuming
2161// additional resources.
2162def A9WriteLMfp#NumAddr : WriteSequence<
2163  [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2164
2165// Assuming aligned access, the upper half of each pair is free with
2166// the same latency.
2167def A9WriteLMfp#NumAddr#Hi : WriteSequence<
2168  [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2169
2170} // foreach NumAddr
2171
2172// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
2173// pair of writes for each 64-bit data loaded. When the number of
2174// registers is odd, the last WriteLMfpnHi is naturally ignored because
2175// the instruction has no following def operands.
2176
2177def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
2178                 [A9WriteLMfp1, A9WriteLMfp2,       // 0-1
2179                  A9WriteLMfp3, A9WriteLMfp4,       // 2-3
2180                  A9WriteLMfp5, A9WriteLMfp6,       // 4-5
2181                  A9WriteLMfp7, A9WriteLMfp8,       // 6-7
2182                  A9WriteLMfp1Hi,                   // 8-8
2183                  A9WriteLMfp2Hi, A9WriteLMfp2Hi,   // 9-10
2184                  A9WriteLMfp3Hi, A9WriteLMfp3Hi,   // 11-12
2185                  A9WriteLMfp4Hi, A9WriteLMfp4Hi,   // 13-14
2186                  A9WriteLMfp5Hi, A9WriteLMfp5Hi,   // 15-16
2187                  A9WriteLMfp6Hi, A9WriteLMfp6Hi,   // 17-18
2188                  A9WriteLMfp7Hi, A9WriteLMfp7Hi,   // 19-20
2189                  A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2190
2191def A9WriteLMfpPostRA : SchedWriteVariant<[
2192  SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2193  SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2194  SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2195  SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2196  SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2197  SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2198  SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2199  SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
2200  // For unknown LDMs, define the maximum number of writes, but only
2201  // make the first two consume resources. We are optimizing for the case
2202  // where the operands are DPRs, and this determines the first eight
2203  // types. The remaining eight types are filled to cover the case
2204  // where the operands are SPRs.
2205  SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
2206                             A9WriteLMfp3Hi, A9WriteLMfp4Hi,
2207                             A9WriteLMfp5Hi, A9WriteLMfp6Hi,
2208                             A9WriteLMfp7Hi, A9WriteLMfp8Hi,
2209                             A9WriteLMfp5Hi, A9WriteLMfp5Hi,
2210                             A9WriteLMfp6Hi, A9WriteLMfp6Hi,
2211                             A9WriteLMfp7Hi, A9WriteLMfp7Hi,
2212                             A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
2213  let Variadic = 1;
2214}
2215
2216// Distinguish between our multiple MI-level forms of the same
2217// VLDM/VSTM instructions.
2218def A9PreRA : SchedPredicate<
2219  "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
2220def A9PostRA : SchedPredicate<
2221  "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
2222
2223// VLDM represents all destination registers as a single register
2224// tuple, unlike LDM. So the number of write operands is not variadic.
2225def A9WriteLMfp : SchedWriteVariant<[
2226  SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
2227  SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
2228
2229//===----------------------------------------------------------------------===//
2230// Resources for other (non-LDM/VLDM) Variants.
2231
2232// These mov immediate writers are unconditionally expanded with
2233// additive latency.
2234def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
2235def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
2236def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
2237
2238// Some ALU operations can read loaded integer values one cycle early.
2239def A9ReadALU : SchedReadAdvance<1,
2240  [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
2241   A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
2242   A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
2243   A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
2244   A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
2245
2246// Read types for operands that are unconditionally read in cycle N
2247// after the instruction issues, decreases producer latency by N-1.
2248def A9Read2 : SchedReadAdvance<1>;
2249def A9Read3 : SchedReadAdvance<2>;
2250def A9Read4 : SchedReadAdvance<3>;
2251
2252//===----------------------------------------------------------------------===//
2253// Map itinerary classes to scheduler read/write resources per operand.
2254//
2255// For ARM, we piggyback scheduler resources on the Itinerary classes
2256// to avoid perturbing the existing instruction definitions.
2257
2258// This table follows the ARM Cortex-A9 Technical Reference Manuals,
2259// mostly in order.
2260
2261def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
2262                         IIC_iMVNi,IIC_iMVNsi,
2263                         IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
2264def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
2265def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
2266
2267def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
2268def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
2269def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
2270
2271def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
2272def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
2273def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
2274def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
2275def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
2276def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
2277def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
2278def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
2279
2280// A9WriteHi ignored for MUL32.
2281def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
2282                                     IIC_iMUL64,IIC_iMAC64]>;
2283// FIXME: SMLALxx needs itin classes
2284def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
2285
2286// TODO: For floating-point ops, we model the pipeline forwarding
2287// latencies here. WAW latencies are sometimes longer.
2288
2289def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
2290                            IIC_fpUNA32, IIC_fpUNA64,
2291                            IIC_fpCMP32, IIC_fpCMP64]>;
2292def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
2293def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
2294                         IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
2295                         IIC_fpALU32, IIC_fpALU64]>;
2296def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
2297def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
2298def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
2299def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
2300def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
2301def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
2302def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
2303def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
2304
2305def :ItinRW<[A9WriteB], [IIC_Br]>;
2306
2307// A9 PLD is processed in a dedicated unit.
2308def :ItinRW<[], [IIC_Preload]>;
2309
2310// Note: We must assume that loads are aligned, since the machine
2311// model cannot know this statically and A9 ignores alignment hints.
2312
2313// A9WriteAdr consumes AGU regardless address writeback. But it's
2314// latency is only relevant for users of an updated address.
2315def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
2316                                     IIC_iLoad_iu,IIC_iLoad_ru]>;
2317def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
2318def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
2319                                       IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
2320def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
2321def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
2322                                            IIC_iLoad_d_ru]>;
2323// Store either has no def operands, or the one def for address writeback.
2324def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
2325                                     IIC_iStore_iu, IIC_iStore_ru,
2326                                     IIC_iStore_d_i, IIC_iStore_d_r,
2327                                     IIC_iStore_d_ru]>;
2328def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
2329                                      IIC_iStore_bh_i, IIC_iStore_bh_r,
2330                                      IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
2331def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
2332
2333// A9WriteML will be expanded into a separate write for each def
2334// operand. Address generation consumes resources, but A9WriteLMAdr
2335// is listed after all def operands, so has no effective latency.
2336//
2337// Note: A9WriteLM expands into an even number of def operands. The
2338// actual number of def operands may be less by one.
2339def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
2340
2341// Load multiple with address writeback has an extra def operand in
2342// front of the loaded registers.
2343//
2344// Reuse the load-multiple variants for store-multiple because the
2345// resources are identical, For stores only the address writeback
2346// has a def operand so the WriteL latencies are unused.
2347def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
2348                                                      IIC_iStore_m,
2349                                                      IIC_iStore_mu]>;
2350def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
2351def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
2352
2353def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
2354
2355def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
2356def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
2357def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
2358                                        IIC_fpStore_m, IIC_fpStore_mu]>;
2359
2360// Note: Unlike VLDM, VLD1 expects the writeback operand after the
2361// normal writes.
2362def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
2363                                         IIC_VLD1x2, IIC_VLD1x2u]>;
2364def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
2365                                         IIC_VLD1x4, IIC_VLD1x4u,
2366                                         IIC_VLD4dup, IIC_VLD4dupu]>;
2367def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
2368                                            IIC_VLD2, IIC_VLD2u,
2369                                            IIC_VLD2dup, IIC_VLD2dupu]>;
2370def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
2371                                            IIC_VLD2x2, IIC_VLD2x2u,
2372                                            IIC_VLD2ln, IIC_VLD2lnu]>;
2373def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
2374                                            IIC_VLD3dup, IIC_VLD3dupu]>;
2375def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
2376                                            IIC_VLD4ln, IIC_VLD4lnu]>;
2377def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
2378
2379// Vector stores use similar resources to vector loads, so use the
2380// same write types. The address write must be first for stores with
2381// address writeback.
2382def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
2383                                         IIC_VST1x2, IIC_VST1x2u,
2384                                         IIC_VST1ln, IIC_VST1lnu,
2385                                         IIC_VST2, IIC_VST2u,
2386                                         IIC_VST2x2, IIC_VST2x2u,
2387                                         IIC_VST2ln, IIC_VST2lnu]>;
2388def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
2389                                         IIC_VST1x4, IIC_VST1x4u,
2390                                         IIC_VST3, IIC_VST3u,
2391                                         IIC_VST3ln, IIC_VST3lnu,
2392                                         IIC_VST4, IIC_VST4u,
2393                                         IIC_VST4ln, IIC_VST4lnu]>;
2394
2395// NEON moves.
2396def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
2397def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
2398def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
2399
2400// NEON integer arithmetic
2401//
2402// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
2403def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
2404// VSUB/VMVN/VCLSD/VCLZD/VCNTD
2405def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
2406// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
2407// ...
2408// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
2409def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
2410
2411// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
2412def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
2413// VQNEG/VQABS
2414def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
2415// VABS
2416def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
2417// VPADD/VPADDL are mapped later under IIC_SHLi.
2418// ...
2419// VCLSQ/VCLZQ/VCNTQ, takes two cycles.
2420def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
2421// VMOVimm/VMVNimm/VORRimm/VBICimm
2422def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
2423def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
2424def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
2425
2426// NEON integer multiply
2427//
2428// Note: these don't quite match the timing docs, but they do match
2429// the original A9 itinerary.
2430def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
2431def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
2432def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
2433def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
2434def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
2435def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
2436def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
2437def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
2438
2439// NEON integer shift
2440// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
2441def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
2442def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
2443
2444// NEON permute
2445def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
2446def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
2447            [IIC_VPERMQ3, IIC_VEXTQ]>;
2448def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
2449def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
2450def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
2451def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
2452def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
2453def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
2454def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
2455def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
2456            [IIC_VTBX4]>;
2457
2458// NEON floating-point
2459def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
2460def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
2461def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
2462def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
2463def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
2464def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
2465def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
2466def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
2467
2468// Map SchedRWs that are identical for cortexa9 to existing resources.
2469def : SchedAlias<WriteALU, A9WriteALU>;
2470def : SchedAlias<WriteALUsr, A9WriteALUsr>;
2471def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
2472def : SchedAlias<ReadALU, A9ReadALU>;
2473def : SchedAlias<ReadALUsr, A9ReadALU>;
2474def : InstRW< [WriteALU],
2475      (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
2476                 "BICrr")>;
2477def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
2478def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
2479
2480
2481def : SchedAlias<WriteCMP, A9WriteALU>;
2482def : SchedAlias<WriteCMPsi, A9WriteALU>;
2483def : SchedAlias<WriteCMPsr, A9WriteALU>;
2484
2485def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
2486                                       "MOVCCsr")>;
2487def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
2488def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
2489                                      "MOV_ga_dyn")>;
2490def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
2491def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2492
2493def : InstRW< [WriteALU], (instregex "SEL")>;
2494
2495def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
2496
2497def : InstRW< [A9WriteM],
2498      (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
2499      "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
2500def : InstRW< [A9WriteM, A9WriteMHi],
2501      (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
2502      "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
2503      "SMLALTT")>;
2504// FIXME: These instructions used to have NoItinerary. Just copied the one from above.
2505def : InstRW< [A9WriteM, A9WriteMHi],
2506      (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
2507      "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
2508
2509def : InstRW<[A9WriteM16, A9WriteM16Hi],
2510      (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
2511def : InstRW<[A9WriteM16, A9WriteM16Hi],
2512      (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
2513
2514def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
2515def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
2516def : InstRW<[A9WriteLb],
2517      (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
2518      "LDRH", "LDRSH", "LDRSB")>;
2519def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
2520
2521def : WriteRes<WriteDiv, []> { let Latency = 0; }
2522
2523def : WriteRes<WriteBr, [A9UnitB]>;
2524def : WriteRes<WriteBrL, [A9UnitB]>;
2525def : WriteRes<WriteBrTbl, [A9UnitB]>;
2526def : WriteRes<WritePreLd, []>;
2527def : SchedAlias<WriteCvtFP, A9WriteF>;
2528def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
2529} // SchedModel = CortexA9Model
2530