1//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A9 processors.
11//
12//===----------------------------------------------------------------------===//
13
14//
15// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16// Reference Manual".
17//
18// Functional units
19def A9_Issue0  : FuncUnit; // Issue 0
20def A9_Issue1  : FuncUnit; // Issue 1
21def A9_Branch  : FuncUnit; // Branch
22def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23def A9_ALU1    : FuncUnit; // ALU pipeline 1
24def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25def A9_NPipe   : FuncUnit; // NEON pipeline
26def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27def A9_LSUnit  : FuncUnit; // L/S Unit
28def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31// Bypasses
32def A9_LdBypass : Bypass;
33
34def CortexA9Itineraries : ProcessorItineraries<
35  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37  [A9_LdBypass], [
38  // Two fully-pipelined integer ALU pipelines
39
40  //
41  // Move instructions, unconditional
42  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
55                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
57  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                               InstrStage<1, [A9_MUX0], 0>,
61                               InstrStage<1, [A9_AGU], 0>,
62                               InstrStage<1, [A9_LSUnit]>], [5]>,
63  //
64  // MVN instructions
65  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
67                              [1]>,
68  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
70                              [1, 1], [NoBypass, A9_LdBypass]>,
71  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
73                              [2, 1]>,
74  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
76                              [3, 1, 1]>,
77  //
78  // No operand cycles
79  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
81  //
82  // Binary Instructions that produce a result
83  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
85                            [1, 1], [NoBypass, A9_LdBypass]>,
86  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
88                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
89  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
91                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
92  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
94                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
95  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
97                            [3, 1, 1, 1],
98                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
99  //
100  // Bitwise Instructions that produce a result
101  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
102                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
103  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
105  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
107  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
109  //
110  // Unary Instructions that produce a result
111
112  // CLZ, RBIT, etc.
113  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
115
116  // BFC, BFI, UBFX, SBFX
117  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
119
120  //
121  // Zero and sign extension instructions
122  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
123                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
124  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
126  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
128  //
129  // Compare instructions
130  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
132                               [1], [A9_LdBypass]>,
133  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
135                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
136  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
138                                [1, 1], [A9_LdBypass, NoBypass]>,
139  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
141                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
142  //
143  // Test instructions
144  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
152  //
153  // Move instructions, conditional
154  // FIXME: Correctly model the extra input dep on the destination.
155  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
157  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
159  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
161  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
163  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
165                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
167
168  // Integer multiply pipeline
169  //
170  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
171                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
172  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
173                               InstrStage<2, [A9_ALU0]>],
174                              [3, 1, 1, 1]>,
175  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
177  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
178                               InstrStage<2, [A9_ALU0]>],
179                              [4, 1, 1, 1]>,
180  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
182  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
183                               InstrStage<3, [A9_ALU0]>],
184                              [4, 5, 1, 1]>,
185  // Integer load pipeline
186  // FIXME: The timings are some rough approximations
187  //
188  // Immediate offset
189  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
190                                 InstrStage<1, [A9_MUX0], 0>,
191                                 InstrStage<1, [A9_AGU], 0>,
192                                 InstrStage<1, [A9_LSUnit]>],
193                                [3, 1], [A9_LdBypass]>,
194  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
195                                 InstrStage<1, [A9_MUX0], 0>,
196                                 InstrStage<2, [A9_AGU], 0>,
197                                 InstrStage<1, [A9_LSUnit]>],
198                                [4, 1], [A9_LdBypass]>,
199  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
200  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
201                                 InstrStage<1, [A9_MUX0], 0>,
202                                 InstrStage<2, [A9_AGU], 0>,
203                                 InstrStage<1, [A9_LSUnit]>],
204                                [3, 3, 1], [A9_LdBypass]>,
205  //
206  // Register offset
207  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
208                                 InstrStage<1, [A9_MUX0], 0>,
209                                 InstrStage<1, [A9_AGU], 0>,
210                                 InstrStage<1, [A9_LSUnit]>],
211                                [3, 1, 1], [A9_LdBypass]>,
212  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
213                                 InstrStage<1, [A9_MUX0], 0>,
214                                 InstrStage<2, [A9_AGU], 0>,
215                                 InstrStage<1, [A9_LSUnit]>],
216                                [4, 1, 1], [A9_LdBypass]>,
217  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
218                                 InstrStage<1, [A9_MUX0], 0>,
219                                 InstrStage<2, [A9_AGU], 0>,
220                                 InstrStage<1, [A9_LSUnit]>],
221                                [3, 3, 1, 1], [A9_LdBypass]>,
222  //
223  // Scaled register offset
224  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
225                                 InstrStage<1, [A9_MUX0], 0>,
226                                 InstrStage<1, [A9_AGU], 0>,
227                                 InstrStage<1, [A9_LSUnit], 0>],
228                                [4, 1, 1], [A9_LdBypass]>,
229  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
230                                 InstrStage<1, [A9_MUX0], 0>,
231                                 InstrStage<2, [A9_AGU], 0>,
232                                 InstrStage<1, [A9_LSUnit]>],
233                                [5, 1, 1], [A9_LdBypass]>,
234  //
235  // Immediate offset with update
236  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
237                                 InstrStage<1, [A9_MUX0], 0>,
238                                 InstrStage<1, [A9_AGU], 0>,
239                                 InstrStage<1, [A9_LSUnit]>],
240                                [3, 2, 1], [A9_LdBypass]>,
241  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
242                                 InstrStage<1, [A9_MUX0], 0>,
243                                 InstrStage<2, [A9_AGU], 0>,
244                                 InstrStage<1, [A9_LSUnit]>],
245                                [4, 3, 1], [A9_LdBypass]>,
246  //
247  // Register offset with update
248  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
249                                 InstrStage<1, [A9_MUX0], 0>,
250                                 InstrStage<1, [A9_AGU], 0>,
251                                 InstrStage<1, [A9_LSUnit]>],
252                                [3, 2, 1, 1], [A9_LdBypass]>,
253  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
254                                 InstrStage<1, [A9_MUX0], 0>,
255                                 InstrStage<2, [A9_AGU], 0>,
256                                 InstrStage<1, [A9_LSUnit]>],
257                                [4, 3, 1, 1], [A9_LdBypass]>,
258  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
259                                 InstrStage<1, [A9_MUX0], 0>,
260                                 InstrStage<2, [A9_AGU], 0>,
261                                 InstrStage<1, [A9_LSUnit]>],
262                                [3, 3, 1, 1], [A9_LdBypass]>,
263  //
264  // Scaled register offset with update
265  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
266                                 InstrStage<1, [A9_MUX0], 0>,
267                                 InstrStage<1, [A9_AGU], 0>,
268                                 InstrStage<1, [A9_LSUnit]>],
269                                [4, 3, 1, 1], [A9_LdBypass]>,
270  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
271                                  InstrStage<1, [A9_MUX0], 0>,
272                                  InstrStage<2, [A9_AGU], 0>,
273                                  InstrStage<1, [A9_LSUnit]>],
274                                 [5, 4, 1, 1], [A9_LdBypass]>,
275  //
276  // Load multiple, def is the 5th operand.
277  // FIXME: This assumes 3 to 4 registers.
278  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
279                                InstrStage<1, [A9_MUX0], 0>,
280                                InstrStage<2, [A9_AGU], 1>,
281                                InstrStage<2, [A9_LSUnit]>],
282                               [1, 1, 1, 1, 3],
283                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
284  //
285  // Load multiple + update, defs are the 1st and 5th operands.
286  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
287                                InstrStage<1, [A9_MUX0], 0>,
288                                InstrStage<2, [A9_AGU], 1>,
289                                InstrStage<2, [A9_LSUnit]>],
290                               [2, 1, 1, 1, 3],
291                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
292  //
293  // Load multiple plus branch
294  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
295                                InstrStage<1, [A9_MUX0], 0>,
296                                InstrStage<1, [A9_AGU], 1>,
297                                InstrStage<2, [A9_LSUnit]>,
298                                InstrStage<1, [A9_Branch]>],
299                               [1, 2, 1, 1, 3],
300                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
301  //
302  // Pop, def is the 3rd operand.
303  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
304                                InstrStage<1, [A9_MUX0], 0>,
305                                InstrStage<2, [A9_AGU], 1>,
306                                InstrStage<2, [A9_LSUnit]>],
307                               [1, 1, 3],
308                               [NoBypass, NoBypass, A9_LdBypass]>,
309  //
310  // Pop + branch, def is the 3rd operand.
311  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
312                                InstrStage<1, [A9_MUX0], 0>,
313                                InstrStage<2, [A9_AGU], 1>,
314                                InstrStage<2, [A9_LSUnit]>,
315                                InstrStage<1, [A9_Branch]>],
316                               [1, 1, 3],
317                               [NoBypass, NoBypass, A9_LdBypass]>,
318
319  //
320  // iLoadi + iALUr for t2LDRpci_pic.
321  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                InstrStage<1, [A9_MUX0], 0>,
323                                InstrStage<1, [A9_AGU], 0>,
324                                InstrStage<1, [A9_LSUnit]>,
325                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
326                               [2, 1]>,
327
328  // Integer store pipeline
329  ///
330  // Immediate offset
331  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                 InstrStage<1, [A9_MUX0], 0>,
333                                 InstrStage<1, [A9_AGU], 0>,
334                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
335  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                 InstrStage<1, [A9_MUX0], 0>,
337                                 InstrStage<2, [A9_AGU], 1>,
338                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
339  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
340  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                 InstrStage<1, [A9_MUX0], 0>,
342                                 InstrStage<2, [A9_AGU], 1>,
343                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
344  //
345  // Register offset
346  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                 InstrStage<1, [A9_MUX0], 0>,
348                                 InstrStage<1, [A9_AGU], 0>,
349                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
350  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                 InstrStage<1, [A9_MUX0], 0>,
352                                 InstrStage<2, [A9_AGU], 1>,
353                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                 InstrStage<1, [A9_MUX0], 0>,
356                                 InstrStage<2, [A9_AGU], 1>,
357                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358  //
359  // Scaled register offset
360  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                  InstrStage<1, [A9_MUX0], 0>,
362                                  InstrStage<1, [A9_AGU], 0>,
363                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
364  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                  InstrStage<1, [A9_MUX0], 0>,
366                                  InstrStage<2, [A9_AGU], 1>,
367                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
368  //
369  // Immediate offset with update
370  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                  InstrStage<1, [A9_MUX0], 0>,
372                                  InstrStage<1, [A9_AGU], 0>,
373                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
374  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                  InstrStage<1, [A9_MUX0], 0>,
376                                  InstrStage<2, [A9_AGU], 1>,
377                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
378  //
379  // Register offset with update
380  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
381                                  InstrStage<1, [A9_MUX0], 0>,
382                                  InstrStage<1, [A9_AGU], 0>,
383                                  InstrStage<1, [A9_LSUnit]>],
384                                 [2, 1, 1, 1]>,
385  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
386                                  InstrStage<1, [A9_MUX0], 0>,
387                                  InstrStage<2, [A9_AGU], 1>,
388                                  InstrStage<1, [A9_LSUnit]>],
389                                 [3, 1, 1, 1]>,
390  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
391                                  InstrStage<1, [A9_MUX0], 0>,
392                                  InstrStage<2, [A9_AGU], 1>,
393                                  InstrStage<1, [A9_LSUnit]>],
394                                 [3, 1, 1, 1]>,
395  //
396  // Scaled register offset with update
397  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398                                    InstrStage<1, [A9_MUX0], 0>,
399                                    InstrStage<1, [A9_AGU], 0>,
400                                    InstrStage<1, [A9_LSUnit]>],
401                                   [2, 1, 1, 1]>,
402  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
403                                    InstrStage<1, [A9_MUX0], 0>,
404                                    InstrStage<2, [A9_AGU], 1>,
405                                    InstrStage<1, [A9_LSUnit]>],
406                                   [3, 1, 1, 1]>,
407  //
408  // Store multiple
409  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410                                InstrStage<1, [A9_MUX0], 0>,
411                                InstrStage<1, [A9_AGU], 0>,
412                                InstrStage<2, [A9_LSUnit]>]>,
413  //
414  // Store multiple + update
415  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
416                                InstrStage<1, [A9_MUX0], 0>,
417                                InstrStage<1, [A9_AGU], 0>,
418                                InstrStage<2, [A9_LSUnit]>], [2]>,
419
420  //
421  // Preload
422  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
423
424  // Branch
425  //
426  // no delay slots, so the latency of a branch is unimportant
427  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
428                                InstrStage<1, [A9_Issue1], 0>,
429                                InstrStage<1, [A9_Branch]>]>,
430
431  // VFP and NEON shares the same register file. This means that every VFP
432  // instruction should wait for full completion of the consecutive NEON
433  // instruction and vice-versa. We model this behavior with two artificial FUs:
434  // DRegsVFP and DRegsVFP.
435  //
436  // Every VFP instruction:
437  //  - Acquires DRegsVFP resource for 1 cycle
438  //  - Reserves DRegsN resource for the whole duration (including time to
439  //    register file writeback!).
440  // Every NEON instruction does the same but with FUs swapped.
441  //
442  // Since the reserved FU cannot be acquired, this models precisely
443  // "cross-domain" stalls.
444
445  // VFP
446  // Issue through integer pipeline, and execute in NEON unit.
447
448  // FP Special Register to Integer Register File Move
449  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
450                              InstrStage<1, [A9_MUX0], 0>,
451                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
452                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
453                              InstrStage<1, [A9_NPipe]>],
454                             [1]>,
455  //
456  // Single-precision FP Unary
457  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458                               InstrStage<1, [A9_MUX0], 0>,
459                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
460                               // Extra latency cycles since wbck is 2 cycles
461                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
462                               InstrStage<1, [A9_NPipe]>],
463                              [1, 1]>,
464  //
465  // Double-precision FP Unary
466  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                               InstrStage<1, [A9_MUX0], 0>,
468                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                               // Extra latency cycles since wbck is 2 cycles
470                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                               InstrStage<1, [A9_NPipe]>],
472                              [1, 1]>,
473
474  //
475  // Single-precision FP Compare
476  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
477                               InstrStage<1, [A9_MUX0], 0>,
478                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
479                               // Extra latency cycles since wbck is 4 cycles
480                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
481                               InstrStage<1, [A9_NPipe]>],
482                              [1, 1]>,
483  //
484  // Double-precision FP Compare
485  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                               InstrStage<1, [A9_MUX0], 0>,
487                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                               // Extra latency cycles since wbck is 4 cycles
489                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                               InstrStage<1, [A9_NPipe]>],
491                              [1, 1]>,
492  //
493  // Single to Double FP Convert
494  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                               InstrStage<1, [A9_MUX0], 0>,
496                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
498                               InstrStage<1, [A9_NPipe]>],
499                              [4, 1]>,
500  //
501  // Double to Single FP Convert
502  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                               InstrStage<1, [A9_MUX0], 0>,
504                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
505                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
506                               InstrStage<1, [A9_NPipe]>],
507                              [4, 1]>,
508
509  //
510  // Single to Half FP Convert
511  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                               InstrStage<1, [A9_MUX0], 0>,
513                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                               InstrStage<1, [A9_NPipe]>],
516                              [4, 1]>,
517  //
518  // Half to Single FP Convert
519  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                               InstrStage<1, [A9_MUX0], 0>,
521                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
522                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
523                               InstrStage<1, [A9_NPipe]>],
524                              [2, 1]>,
525
526  //
527  // Single-Precision FP to Integer Convert
528  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                               InstrStage<1, [A9_MUX0], 0>,
530                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
532                               InstrStage<1, [A9_NPipe]>],
533                              [4, 1]>,
534  //
535  // Double-Precision FP to Integer Convert
536  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537                               InstrStage<1, [A9_MUX0], 0>,
538                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
539                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
540                               InstrStage<1, [A9_NPipe]>],
541                              [4, 1]>,
542  //
543  // Integer to Single-Precision FP Convert
544  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545                               InstrStage<1, [A9_MUX0], 0>,
546                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
547                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
548                               InstrStage<1, [A9_NPipe]>],
549                              [4, 1]>,
550  //
551  // Integer to Double-Precision FP Convert
552  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553                               InstrStage<1, [A9_MUX0], 0>,
554                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
555                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
556                               InstrStage<1, [A9_NPipe]>],
557                              [4, 1]>,
558  //
559  // Single-precision FP ALU
560  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561                               InstrStage<1, [A9_MUX0], 0>,
562                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
563                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
564                               InstrStage<1, [A9_NPipe]>],
565                              [4, 1, 1]>,
566  //
567  // Double-precision FP ALU
568  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569                               InstrStage<1, [A9_MUX0], 0>,
570                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
571                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
572                               InstrStage<1, [A9_NPipe]>],
573                              [4, 1, 1]>,
574  //
575  // Single-precision FP Multiply
576  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577                               InstrStage<1, [A9_MUX0], 0>,
578                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
579                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
580                               InstrStage<1, [A9_NPipe]>],
581                              [5, 1, 1]>,
582  //
583  // Double-precision FP Multiply
584  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585                               InstrStage<1, [A9_MUX0], 0>,
586                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
587                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
588                               InstrStage<2, [A9_NPipe]>],
589                              [6, 1, 1]>,
590  //
591  // Single-precision FP MAC
592  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593                               InstrStage<1, [A9_MUX0], 0>,
594                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
595                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
596                               InstrStage<1, [A9_NPipe]>],
597                              [8, 1, 1, 1]>,
598  //
599  // Double-precision FP MAC
600  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
601                               InstrStage<1,  [A9_MUX0], 0>,
602                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
603                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
604                               InstrStage<2,  [A9_NPipe]>],
605                              [9, 1, 1, 1]>,
606  //
607  // Single-precision FP DIV
608  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
609                               InstrStage<1,  [A9_MUX0], 0>,
610                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
611                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
612                               InstrStage<10, [A9_NPipe]>],
613                              [15, 1, 1]>,
614  //
615  // Double-precision FP DIV
616  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
617                               InstrStage<1,  [A9_MUX0], 0>,
618                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
619                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
620                               InstrStage<20, [A9_NPipe]>],
621                              [25, 1, 1]>,
622  //
623  // Single-precision FP SQRT
624  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
625                               InstrStage<1,  [A9_MUX0], 0>,
626                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
627                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
628                               InstrStage<13, [A9_NPipe]>],
629                              [17, 1]>,
630  //
631  // Double-precision FP SQRT
632  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
633                               InstrStage<1,  [A9_MUX0], 0>,
634                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
635                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
636                               InstrStage<28, [A9_NPipe]>],
637                              [32, 1]>,
638
639  //
640  // Integer to Single-precision Move
641  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
642                               InstrStage<1, [A9_MUX0], 0>,
643                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
644                               // Extra 1 latency cycle since wbck is 2 cycles
645                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
646                               InstrStage<1, [A9_NPipe]>],
647                              [1, 1]>,
648  //
649  // Integer to Double-precision Move
650  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651                               InstrStage<1, [A9_MUX0], 0>,
652                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
653                               // Extra 1 latency cycle since wbck is 2 cycles
654                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
655                               InstrStage<1, [A9_NPipe]>],
656                              [1, 1, 1]>,
657  //
658  // Single-precision to Integer Move
659  //
660  // On A9 move-from-VFP is free to issue with no stall if other VFP
661  // operations are in flight. I assume it still can't dual-issue though.
662  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
663                               InstrStage<1, [A9_MUX0], 0>],
664                              [2, 1]>,
665  //
666  // Double-precision to Integer Move
667  //
668  // On A9 move-from-VFP is free to issue with no stall if other VFP
669  // operations are in flight. I assume it still can't dual-issue though.
670  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
671                               InstrStage<1, [A9_MUX0], 0>],
672                              [2, 1, 1]>,
673  //
674  // Single-precision FP Load
675  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                               InstrStage<1, [A9_MUX0], 0>,
677                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
678                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
679                               InstrStage<1, [A9_NPipe], 0>,
680                               InstrStage<1, [A9_LSUnit]>],
681                              [1, 1]>,
682  //
683  // Double-precision FP Load
684  // FIXME: Result latency is 1 if address is 64-bit aligned.
685  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
686                               InstrStage<1, [A9_MUX0], 0>,
687                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
688                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
689                               InstrStage<1, [A9_NPipe], 0>,
690                               InstrStage<1, [A9_LSUnit]>],
691                              [2, 1]>,
692  //
693  // FP Load Multiple
694  // FIXME: assumes 2 doubles which requires 2 LS cycles.
695  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                               InstrStage<1, [A9_MUX0], 0>,
697                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
698                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
699                               InstrStage<1, [A9_NPipe], 0>,
700                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
701  //
702  // FP Load Multiple + update
703  // FIXME: assumes 2 doubles which requires 2 LS cycles.
704  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
705                               InstrStage<1, [A9_MUX0], 0>,
706                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
707                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
708                               InstrStage<1, [A9_NPipe], 0>,
709                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
710  //
711  // Single-precision FP Store
712  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713                               InstrStage<1, [A9_MUX0], 0>,
714                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
715                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
716                               InstrStage<1, [A9_NPipe], 0>,
717                               InstrStage<1, [A9_LSUnit]>],
718                              [1, 1]>,
719  //
720  // Double-precision FP Store
721  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
722                               InstrStage<1, [A9_MUX0], 0>,
723                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
724                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
725                               InstrStage<1, [A9_NPipe], 0>,
726                               InstrStage<1, [A9_LSUnit]>],
727                              [1, 1]>,
728  //
729  // FP Store Multiple
730  // FIXME: assumes 2 doubles which requires 2 LS cycles.
731  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
732                               InstrStage<1, [A9_MUX0], 0>,
733                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
734                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
735                               InstrStage<1, [A9_NPipe], 0>,
736                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
737  //
738  // FP Store Multiple + update
739  // FIXME: assumes 2 doubles which requires 2 LS cycles.
740  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
741                                InstrStage<1, [A9_MUX0], 0>,
742                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
743                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
744                                InstrStage<1, [A9_NPipe], 0>,
745                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
746  // NEON
747  // VLD1
748  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                               InstrStage<1, [A9_MUX0], 0>,
750                               InstrStage<1, [A9_DRegsN],   0, Required>,
751                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
752                               InstrStage<1, [A9_NPipe], 0>,
753                               InstrStage<1, [A9_LSUnit]>],
754                              [1, 1]>,
755  // VLD1x2
756  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
757                               InstrStage<1, [A9_MUX0], 0>,
758                               InstrStage<1, [A9_DRegsN],   0, Required>,
759                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
760                               InstrStage<1, [A9_NPipe], 0>,
761                               InstrStage<1, [A9_LSUnit]>],
762                              [1, 1, 1]>,
763  // VLD1x3
764  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
765                               InstrStage<1, [A9_MUX0], 0>,
766                               InstrStage<1, [A9_DRegsN],   0, Required>,
767                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
768                               InstrStage<2, [A9_NPipe], 0>,
769                               InstrStage<2, [A9_LSUnit]>],
770                              [1, 1, 2, 1]>,
771  // VLD1x4
772  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
773                               InstrStage<1, [A9_MUX0], 0>,
774                               InstrStage<1, [A9_DRegsN],   0, Required>,
775                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
776                               InstrStage<2, [A9_NPipe], 0>,
777                               InstrStage<2, [A9_LSUnit]>],
778                              [1, 1, 2, 2, 1]>,
779  // VLD1u
780  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
781                               InstrStage<1, [A9_MUX0], 0>,
782                               InstrStage<1, [A9_DRegsN],   0, Required>,
783                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
784                               InstrStage<1, [A9_NPipe], 0>,
785                               InstrStage<1, [A9_LSUnit]>],
786                              [1, 2, 1]>,
787  // VLD1x2u
788  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
789                               InstrStage<1, [A9_MUX0], 0>,
790                               InstrStage<1, [A9_DRegsN],   0, Required>,
791                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
792                               InstrStage<1, [A9_NPipe], 0>,
793                               InstrStage<1, [A9_LSUnit]>],
794                              [1, 1, 2, 1]>,
795  // VLD1x3u
796  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
797                               InstrStage<1, [A9_MUX0], 0>,
798                               InstrStage<1, [A9_DRegsN],   0, Required>,
799                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
800                               InstrStage<2, [A9_NPipe], 0>,
801                               InstrStage<2, [A9_LSUnit]>],
802                              [1, 1, 2, 2, 1]>,
803  // VLD1x4u
804  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
805                               InstrStage<1, [A9_MUX0], 0>,
806                               InstrStage<1, [A9_DRegsN],   0, Required>,
807                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
808                               InstrStage<2, [A9_NPipe], 0>,
809                               InstrStage<2, [A9_LSUnit]>],
810                              [1, 1, 2, 2, 2, 1]>,
811  //
812  // VLD1ln
813  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
814                               InstrStage<1, [A9_MUX0], 0>,
815                               InstrStage<1, [A9_DRegsN],   0, Required>,
816                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817                               InstrStage<2, [A9_NPipe], 0>,
818                               InstrStage<2, [A9_LSUnit]>],
819                              [3, 1, 1, 1]>,
820  //
821  // VLD1lnu
822  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
823                               InstrStage<1, [A9_MUX0], 0>,
824                               InstrStage<1, [A9_DRegsN],   0, Required>,
825                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
826                               InstrStage<2, [A9_NPipe], 0>,
827                               InstrStage<2, [A9_LSUnit]>],
828                              [3, 2, 1, 1, 1, 1]>,
829  //
830  // VLD1dup
831  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
832                               InstrStage<1, [A9_MUX0], 0>,
833                               InstrStage<1, [A9_DRegsN],   0, Required>,
834                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
835                               InstrStage<1, [A9_NPipe], 0>,
836                               InstrStage<1, [A9_LSUnit]>],
837                              [2, 1]>,
838  //
839  // VLD1dupu
840  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
841                               InstrStage<1, [A9_MUX0], 0>,
842                               InstrStage<1, [A9_DRegsN],   0, Required>,
843                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
844                               InstrStage<1, [A9_NPipe], 0>,
845                               InstrStage<1, [A9_LSUnit]>],
846                              [2, 2, 1, 1]>,
847  //
848  // VLD2
849  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
850                               InstrStage<1, [A9_MUX0], 0>,
851                               InstrStage<1, [A9_DRegsN],   0, Required>,
852                               // Extra latency cycles since wbck is 7 cycles
853                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
854                               InstrStage<1, [A9_NPipe], 0>,
855                               InstrStage<1, [A9_LSUnit]>],
856                              [2, 2, 1]>,
857  //
858  // VLD2x2
859  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860                               InstrStage<1, [A9_MUX0], 0>,
861                               InstrStage<1, [A9_DRegsN],   0, Required>,
862                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
863                               InstrStage<2, [A9_NPipe], 0>,
864                               InstrStage<2, [A9_LSUnit]>],
865                              [2, 3, 2, 3, 1]>,
866  //
867  // VLD2ln
868  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
869                               InstrStage<1, [A9_MUX0], 0>,
870                               InstrStage<1, [A9_DRegsN],   0, Required>,
871                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
872                               InstrStage<2, [A9_NPipe], 0>,
873                               InstrStage<2, [A9_LSUnit]>],
874                              [3, 3, 1, 1, 1, 1]>,
875  //
876  // VLD2u
877  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
878                               InstrStage<1, [A9_MUX0], 0>,
879                               InstrStage<1, [A9_DRegsN],   0, Required>,
880                               // Extra latency cycles since wbck is 7 cycles
881                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
882                               InstrStage<1, [A9_NPipe], 0>,
883                               InstrStage<1, [A9_LSUnit]>],
884                              [2, 2, 2, 1, 1, 1]>,
885  //
886  // VLD2x2u
887  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
888                               InstrStage<1, [A9_MUX0], 0>,
889                               InstrStage<1, [A9_DRegsN],   0, Required>,
890                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
891                               InstrStage<2, [A9_NPipe], 0>,
892                               InstrStage<2, [A9_LSUnit]>],
893                              [2, 3, 2, 3, 2, 1]>,
894  //
895  // VLD2lnu
896  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
897                               InstrStage<1, [A9_MUX0], 0>,
898                               InstrStage<1, [A9_DRegsN],   0, Required>,
899                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
900                               InstrStage<2, [A9_NPipe], 0>,
901                               InstrStage<2, [A9_LSUnit]>],
902                              [3, 3, 2, 1, 1, 1, 1, 1]>,
903  //
904  // VLD2dup
905  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
906                               InstrStage<1, [A9_MUX0], 0>,
907                               InstrStage<1, [A9_DRegsN],   0, Required>,
908                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
909                               InstrStage<1, [A9_NPipe], 0>,
910                               InstrStage<1, [A9_LSUnit]>],
911                              [2, 2, 1]>,
912  //
913  // VLD2dupu
914  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
915                               InstrStage<1, [A9_MUX0], 0>,
916                               InstrStage<1, [A9_DRegsN],   0, Required>,
917                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
918                               InstrStage<1, [A9_NPipe], 0>,
919                               InstrStage<1, [A9_LSUnit]>],
920                              [2, 2, 2, 1, 1]>,
921  //
922  // VLD3
923  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
924                               InstrStage<1, [A9_MUX0], 0>,
925                               InstrStage<1, [A9_DRegsN],   0, Required>,
926                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
927                               InstrStage<3, [A9_NPipe], 0>,
928                               InstrStage<3, [A9_LSUnit]>],
929                              [3, 3, 4, 1]>,
930  //
931  // VLD3ln
932  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
933                               InstrStage<1, [A9_MUX0], 0>,
934                               InstrStage<1, [A9_DRegsN],   0, Required>,
935                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
936                               InstrStage<5, [A9_NPipe], 0>,
937                               InstrStage<5, [A9_LSUnit]>],
938                              [5, 5, 6, 1, 1, 1, 1, 2]>,
939  //
940  // VLD3u
941  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
942                               InstrStage<1, [A9_MUX0], 0>,
943                               InstrStage<1, [A9_DRegsN],   0, Required>,
944                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
945                               InstrStage<3, [A9_NPipe], 0>,
946                               InstrStage<3, [A9_LSUnit]>],
947                              [3, 3, 4, 2, 1]>,
948  //
949  // VLD3lnu
950  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
951                               InstrStage<1, [A9_MUX0], 0>,
952                               InstrStage<1, [A9_DRegsN],   0, Required>,
953                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
954                               InstrStage<5, [A9_NPipe], 0>,
955                               InstrStage<5, [A9_LSUnit]>],
956                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
957  //
958  // VLD3dup
959  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
960                               InstrStage<1, [A9_MUX0], 0>,
961                               InstrStage<1, [A9_DRegsN],   0, Required>,
962                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
963                               InstrStage<3, [A9_NPipe], 0>,
964                               InstrStage<3, [A9_LSUnit]>],
965                              [3, 3, 4, 1]>,
966  //
967  // VLD3dupu
968  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
969                               InstrStage<1, [A9_MUX0], 0>,
970                               InstrStage<1, [A9_DRegsN],   0, Required>,
971                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
972                               InstrStage<3, [A9_NPipe], 0>,
973                               InstrStage<3, [A9_LSUnit]>],
974                              [3, 3, 4, 2, 1, 1]>,
975  //
976  // VLD4
977  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
978                               InstrStage<1, [A9_MUX0], 0>,
979                               InstrStage<1, [A9_DRegsN],   0, Required>,
980                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
981                               InstrStage<3, [A9_NPipe], 0>,
982                               InstrStage<3, [A9_LSUnit]>],
983                              [3, 3, 4, 4, 1]>,
984  //
985  // VLD4ln
986  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
987                               InstrStage<1, [A9_MUX0], 0>,
988                               InstrStage<1, [A9_DRegsN],   0, Required>,
989                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
990                               InstrStage<4, [A9_NPipe], 0>,
991                               InstrStage<4, [A9_LSUnit]>],
992                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
993  //
994  // VLD4u
995  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
996                               InstrStage<1, [A9_MUX0], 0>,
997                               InstrStage<1, [A9_DRegsN],   0, Required>,
998                               InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
999                               InstrStage<3, [A9_NPipe], 0>,
1000                               InstrStage<3, [A9_LSUnit]>],
1001                              [3, 3, 4, 4, 2, 1]>,
1002  //
1003  // VLD4lnu
1004  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1005                               InstrStage<1, [A9_MUX0], 0>,
1006                               InstrStage<1, [A9_DRegsN],   0, Required>,
1007                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1008                               InstrStage<4, [A9_NPipe], 0>,
1009                               InstrStage<4, [A9_LSUnit]>],
1010                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1011  //
1012  // VLD4dup
1013  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1014                               InstrStage<1, [A9_MUX0], 0>,
1015                               InstrStage<1, [A9_DRegsN],   0, Required>,
1016                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1017                               InstrStage<2, [A9_NPipe], 0>,
1018                               InstrStage<2, [A9_LSUnit]>],
1019                              [2, 2, 3, 3, 1]>,
1020  //
1021  // VLD4dupu
1022  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1023                               InstrStage<1, [A9_MUX0], 0>,
1024                               InstrStage<1, [A9_DRegsN],   0, Required>,
1025                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1026                               InstrStage<2, [A9_NPipe], 0>,
1027                               InstrStage<2, [A9_LSUnit]>],
1028                              [2, 2, 3, 3, 2, 1, 1]>,
1029  //
1030  // VST1
1031  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1032                               InstrStage<1, [A9_MUX0], 0>,
1033                               InstrStage<1, [A9_DRegsN],   0, Required>,
1034                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1035                               InstrStage<1, [A9_NPipe], 0>,
1036                               InstrStage<1, [A9_LSUnit]>],
1037                              [1, 1, 1]>,
1038  //
1039  // VST1x2
1040  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1041                               InstrStage<1, [A9_MUX0], 0>,
1042                               InstrStage<1, [A9_DRegsN],   0, Required>,
1043                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1044                               InstrStage<1, [A9_NPipe], 0>,
1045                               InstrStage<1, [A9_LSUnit]>],
1046                              [1, 1, 1, 1]>,
1047  //
1048  // VST1x3
1049  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1050                               InstrStage<1, [A9_MUX0], 0>,
1051                               InstrStage<1, [A9_DRegsN],   0, Required>,
1052                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1053                               InstrStage<2, [A9_NPipe], 0>,
1054                               InstrStage<2, [A9_LSUnit]>],
1055                              [1, 1, 1, 1, 2]>,
1056  //
1057  // VST1x4
1058  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1059                               InstrStage<1, [A9_MUX0], 0>,
1060                               InstrStage<1, [A9_DRegsN],   0, Required>,
1061                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1062                               InstrStage<2, [A9_NPipe], 0>,
1063                               InstrStage<2, [A9_LSUnit]>],
1064                              [1, 1, 1, 1, 2, 2]>,
1065  //
1066  // VST1u
1067  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1068                               InstrStage<1, [A9_MUX0], 0>,
1069                               InstrStage<1, [A9_DRegsN],   0, Required>,
1070                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1071                               InstrStage<1, [A9_NPipe], 0>,
1072                               InstrStage<1, [A9_LSUnit]>],
1073                              [2, 1, 1, 1, 1]>,
1074  //
1075  // VST1x2u
1076  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1077                               InstrStage<1, [A9_MUX0], 0>,
1078                               InstrStage<1, [A9_DRegsN],   0, Required>,
1079                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1080                               InstrStage<1, [A9_NPipe], 0>,
1081                               InstrStage<1, [A9_LSUnit]>],
1082                              [2, 1, 1, 1, 1, 1]>,
1083  //
1084  // VST1x3u
1085  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1086                               InstrStage<1, [A9_MUX0], 0>,
1087                               InstrStage<1, [A9_DRegsN],   0, Required>,
1088                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1089                               InstrStage<2, [A9_NPipe], 0>,
1090                               InstrStage<2, [A9_LSUnit]>],
1091                              [2, 1, 1, 1, 1, 1, 2]>,
1092  //
1093  // VST1x4u
1094  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1095                               InstrStage<1, [A9_MUX0], 0>,
1096                               InstrStage<1, [A9_DRegsN],   0, Required>,
1097                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1098                               InstrStage<2, [A9_NPipe], 0>,
1099                               InstrStage<2, [A9_LSUnit]>],
1100                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1101  //
1102  // VST1ln
1103  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1104                               InstrStage<1, [A9_MUX0], 0>,
1105                               InstrStage<1, [A9_DRegsN],   0, Required>,
1106                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1107                               InstrStage<1, [A9_NPipe], 0>,
1108                               InstrStage<1, [A9_LSUnit]>],
1109                              [1, 1, 1]>,
1110  //
1111  // VST1lnu
1112  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1113                               InstrStage<1, [A9_MUX0], 0>,
1114                               InstrStage<1, [A9_DRegsN],   0, Required>,
1115                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1116                               InstrStage<1, [A9_NPipe], 0>,
1117                               InstrStage<1, [A9_LSUnit]>],
1118                              [2, 1, 1, 1, 1]>,
1119  //
1120  // VST2
1121  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1122                               InstrStage<1, [A9_MUX0], 0>,
1123                               InstrStage<1, [A9_DRegsN],   0, Required>,
1124                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1125                               InstrStage<1, [A9_NPipe], 0>,
1126                               InstrStage<1, [A9_LSUnit]>],
1127                              [1, 1, 1, 1]>,
1128  //
1129  // VST2x2
1130  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1131                               InstrStage<1, [A9_MUX0], 0>,
1132                               InstrStage<1, [A9_DRegsN],   0, Required>,
1133                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1134                               InstrStage<3, [A9_NPipe], 0>,
1135                               InstrStage<3, [A9_LSUnit]>],
1136                              [1, 1, 1, 1, 2, 2]>,
1137  //
1138  // VST2u
1139  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1140                               InstrStage<1, [A9_MUX0], 0>,
1141                               InstrStage<1, [A9_DRegsN],   0, Required>,
1142                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1143                               InstrStage<1, [A9_NPipe], 0>,
1144                               InstrStage<1, [A9_LSUnit]>],
1145                              [2, 1, 1, 1, 1, 1]>,
1146  //
1147  // VST2x2u
1148  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1149                               InstrStage<1, [A9_MUX0], 0>,
1150                               InstrStage<1, [A9_DRegsN],   0, Required>,
1151                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1152                               InstrStage<3, [A9_NPipe], 0>,
1153                               InstrStage<3, [A9_LSUnit]>],
1154                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1155  //
1156  // VST2ln
1157  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1158                               InstrStage<1, [A9_MUX0], 0>,
1159                               InstrStage<1, [A9_DRegsN],   0, Required>,
1160                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1161                               InstrStage<1, [A9_NPipe], 0>,
1162                               InstrStage<1, [A9_LSUnit]>],
1163                              [1, 1, 1, 1]>,
1164  //
1165  // VST2lnu
1166  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1167                               InstrStage<1, [A9_MUX0], 0>,
1168                               InstrStage<1, [A9_DRegsN],   0, Required>,
1169                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1170                               InstrStage<1, [A9_NPipe], 0>,
1171                               InstrStage<1, [A9_LSUnit]>],
1172                              [2, 1, 1, 1, 1, 1]>,
1173  //
1174  // VST3
1175  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1176                               InstrStage<1, [A9_MUX0], 0>,
1177                               InstrStage<1, [A9_DRegsN],   0, Required>,
1178                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1179                               InstrStage<2, [A9_NPipe], 0>,
1180                               InstrStage<2, [A9_LSUnit]>],
1181                              [1, 1, 1, 1, 2]>,
1182  //
1183  // VST3u
1184  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1185                               InstrStage<1, [A9_MUX0], 0>,
1186                               InstrStage<1, [A9_DRegsN],   0, Required>,
1187                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1188                               InstrStage<2, [A9_NPipe], 0>,
1189                               InstrStage<2, [A9_LSUnit]>],
1190                              [2, 1, 1, 1, 1, 1, 2]>,
1191  //
1192  // VST3ln
1193  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1194                               InstrStage<1, [A9_MUX0], 0>,
1195                               InstrStage<1, [A9_DRegsN],   0, Required>,
1196                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1197                               InstrStage<3, [A9_NPipe], 0>,
1198                               InstrStage<3, [A9_LSUnit]>],
1199                              [1, 1, 1, 1, 2]>,
1200  //
1201  // VST3lnu
1202  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1203                               InstrStage<1, [A9_MUX0], 0>,
1204                               InstrStage<1, [A9_DRegsN],   0, Required>,
1205                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1206                               InstrStage<3, [A9_NPipe], 0>,
1207                               InstrStage<3, [A9_LSUnit]>],
1208                              [2, 1, 1, 1, 1, 1, 2]>,
1209  //
1210  // VST4
1211  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1212                               InstrStage<1, [A9_MUX0], 0>,
1213                               InstrStage<1, [A9_DRegsN],   0, Required>,
1214                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1215                               InstrStage<2, [A9_NPipe], 0>,
1216                               InstrStage<2, [A9_LSUnit]>],
1217                              [1, 1, 1, 1, 2, 2]>,
1218  //
1219  // VST4u
1220  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1221                               InstrStage<1, [A9_MUX0], 0>,
1222                               InstrStage<1, [A9_DRegsN],   0, Required>,
1223                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1224                               InstrStage<2, [A9_NPipe], 0>,
1225                               InstrStage<2, [A9_LSUnit]>],
1226                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1227  //
1228  // VST4ln
1229  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1230                               InstrStage<1, [A9_MUX0], 0>,
1231                               InstrStage<1, [A9_DRegsN],   0, Required>,
1232                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1233                               InstrStage<2, [A9_NPipe], 0>,
1234                               InstrStage<2, [A9_LSUnit]>],
1235                              [1, 1, 1, 1, 2, 2]>,
1236  //
1237  // VST4lnu
1238  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1239                               InstrStage<1, [A9_MUX0], 0>,
1240                               InstrStage<1, [A9_DRegsN],   0, Required>,
1241                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1242                               InstrStage<2, [A9_NPipe], 0>,
1243                               InstrStage<2, [A9_LSUnit]>],
1244                              [2, 1, 1, 1, 1, 1, 2, 2]>,
1245
1246  //
1247  // Double-register Integer Unary
1248  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249                               InstrStage<1, [A9_MUX0], 0>,
1250                               InstrStage<1, [A9_DRegsN],   0, Required>,
1251                               // Extra latency cycles since wbck is 6 cycles
1252                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1253                               InstrStage<1, [A9_NPipe]>],
1254                              [4, 2]>,
1255  //
1256  // Quad-register Integer Unary
1257  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1258                               InstrStage<1, [A9_MUX0], 0>,
1259                               InstrStage<1, [A9_DRegsN],   0, Required>,
1260                               // Extra latency cycles since wbck is 6 cycles
1261                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1262                               InstrStage<1, [A9_NPipe]>],
1263                              [4, 2]>,
1264  //
1265  // Double-register Integer Q-Unary
1266  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1267                               InstrStage<1, [A9_MUX0], 0>,
1268                               InstrStage<1, [A9_DRegsN],   0, Required>,
1269                               // Extra latency cycles since wbck is 6 cycles
1270                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1271                               InstrStage<1, [A9_NPipe]>],
1272                              [4, 1]>,
1273  //
1274  // Quad-register Integer CountQ-Unary
1275  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1276                               InstrStage<1, [A9_MUX0], 0>,
1277                               InstrStage<1, [A9_DRegsN],   0, Required>,
1278                               // Extra latency cycles since wbck is 6 cycles
1279                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1280                               InstrStage<1, [A9_NPipe]>],
1281                              [4, 1]>,
1282  //
1283  // Double-register Integer Binary
1284  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1285                               InstrStage<1, [A9_MUX0], 0>,
1286                               InstrStage<1, [A9_DRegsN],   0, Required>,
1287                               // Extra latency cycles since wbck is 6 cycles
1288                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1289                               InstrStage<1, [A9_NPipe]>],
1290                              [3, 2, 2]>,
1291  //
1292  // Quad-register Integer Binary
1293  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1294                               InstrStage<1, [A9_MUX0], 0>,
1295                               InstrStage<1, [A9_DRegsN],   0, Required>,
1296                               // Extra latency cycles since wbck is 6 cycles
1297                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1298                               InstrStage<1, [A9_NPipe]>],
1299                              [3, 2, 2]>,
1300  //
1301  // Double-register Integer Subtract
1302  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1303                               InstrStage<1, [A9_MUX0], 0>,
1304                               InstrStage<1, [A9_DRegsN],   0, Required>,
1305                               // Extra latency cycles since wbck is 6 cycles
1306                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1307                               InstrStage<1, [A9_NPipe]>],
1308                              [3, 2, 1]>,
1309  //
1310  // Quad-register Integer Subtract
1311  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1312                               InstrStage<1, [A9_MUX0], 0>,
1313                               InstrStage<1, [A9_DRegsN],   0, Required>,
1314                               // Extra latency cycles since wbck is 6 cycles
1315                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1316                               InstrStage<1, [A9_NPipe]>],
1317                              [3, 2, 1]>,
1318  //
1319  // Double-register Integer Shift
1320  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1321                               InstrStage<1, [A9_MUX0], 0>,
1322                               InstrStage<1, [A9_DRegsN],   0, Required>,
1323                               // Extra latency cycles since wbck is 6 cycles
1324                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1325                               InstrStage<1, [A9_NPipe]>],
1326                              [3, 1, 1]>,
1327  //
1328  // Quad-register Integer Shift
1329  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1330                               InstrStage<1, [A9_MUX0], 0>,
1331                               InstrStage<1, [A9_DRegsN],   0, Required>,
1332                               // Extra latency cycles since wbck is 6 cycles
1333                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1334                               InstrStage<1, [A9_NPipe]>],
1335                              [3, 1, 1]>,
1336  //
1337  // Double-register Integer Shift (4 cycle)
1338  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339                               InstrStage<1, [A9_MUX0], 0>,
1340                               InstrStage<1, [A9_DRegsN],   0, Required>,
1341                               // Extra latency cycles since wbck is 6 cycles
1342                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1343                               InstrStage<1, [A9_NPipe]>],
1344                              [4, 1, 1]>,
1345  //
1346  // Quad-register Integer Shift (4 cycle)
1347  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1348                               InstrStage<1, [A9_MUX0], 0>,
1349                               InstrStage<1, [A9_DRegsN],   0, Required>,
1350                               // Extra latency cycles since wbck is 6 cycles
1351                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1352                               InstrStage<1, [A9_NPipe]>],
1353                              [4, 1, 1]>,
1354  //
1355  // Double-register Integer Binary (4 cycle)
1356  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1357                               InstrStage<1, [A9_MUX0], 0>,
1358                               InstrStage<1, [A9_DRegsN],   0, Required>,
1359                               // Extra latency cycles since wbck is 6 cycles
1360                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1361                               InstrStage<1, [A9_NPipe]>],
1362                              [4, 2, 2]>,
1363  //
1364  // Quad-register Integer Binary (4 cycle)
1365  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1366                               InstrStage<1, [A9_MUX0], 0>,
1367                               InstrStage<1, [A9_DRegsN],   0, Required>,
1368                               // Extra latency cycles since wbck is 6 cycles
1369                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1370                               InstrStage<1, [A9_NPipe]>],
1371                              [4, 2, 2]>,
1372  //
1373  // Double-register Integer Subtract (4 cycle)
1374  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1375                               InstrStage<1, [A9_MUX0], 0>,
1376                               InstrStage<1, [A9_DRegsN],   0, Required>,
1377                               // Extra latency cycles since wbck is 6 cycles
1378                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1379                               InstrStage<1, [A9_NPipe]>],
1380                              [4, 2, 1]>,
1381  //
1382  // Quad-register Integer Subtract (4 cycle)
1383  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1384                               InstrStage<1, [A9_MUX0], 0>,
1385                               InstrStage<1, [A9_DRegsN],   0, Required>,
1386                               // Extra latency cycles since wbck is 6 cycles
1387                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1388                               InstrStage<1, [A9_NPipe]>],
1389                              [4, 2, 1]>,
1390
1391  //
1392  // Double-register Integer Count
1393  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394                               InstrStage<1, [A9_MUX0], 0>,
1395                               InstrStage<1, [A9_DRegsN],   0, Required>,
1396                               // Extra latency cycles since wbck is 6 cycles
1397                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1398                               InstrStage<1, [A9_NPipe]>],
1399                              [3, 2, 2]>,
1400  //
1401  // Quad-register Integer Count
1402  // Result written in N3, but that is relative to the last cycle of multicycle,
1403  // so we use 4 for those cases
1404  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1405                               InstrStage<1, [A9_MUX0], 0>,
1406                               InstrStage<1, [A9_DRegsN],   0, Required>,
1407                               // Extra latency cycles since wbck is 7 cycles
1408                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1409                               InstrStage<2, [A9_NPipe]>],
1410                              [4, 2, 2]>,
1411  //
1412  // Double-register Absolute Difference and Accumulate
1413  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1414                               InstrStage<1, [A9_MUX0], 0>,
1415                               InstrStage<1, [A9_DRegsN],   0, Required>,
1416                               // Extra latency cycles since wbck is 6 cycles
1417                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1418                               InstrStage<1, [A9_NPipe]>],
1419                              [6, 3, 2, 1]>,
1420  //
1421  // Quad-register Absolute Difference and Accumulate
1422  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1423                               InstrStage<1, [A9_MUX0], 0>,
1424                               InstrStage<1, [A9_DRegsN],   0, Required>,
1425                               // Extra latency cycles since wbck is 6 cycles
1426                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1427                               InstrStage<2, [A9_NPipe]>],
1428                              [6, 3, 2, 1]>,
1429  //
1430  // Double-register Integer Pair Add Long
1431  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1432                               InstrStage<1, [A9_MUX0], 0>,
1433                               InstrStage<1, [A9_DRegsN],   0, Required>,
1434                               // Extra latency cycles since wbck is 6 cycles
1435                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1436                               InstrStage<1, [A9_NPipe]>],
1437                              [6, 3, 1]>,
1438  //
1439  // Quad-register Integer Pair Add Long
1440  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1441                               InstrStage<1, [A9_MUX0], 0>,
1442                               InstrStage<1, [A9_DRegsN],   0, Required>,
1443                               // Extra latency cycles since wbck is 6 cycles
1444                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1445                               InstrStage<2, [A9_NPipe]>],
1446                              [6, 3, 1]>,
1447
1448  //
1449  // Double-register Integer Multiply (.8, .16)
1450  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451                               InstrStage<1, [A9_MUX0], 0>,
1452                               InstrStage<1, [A9_DRegsN],   0, Required>,
1453                               // Extra latency cycles since wbck is 6 cycles
1454                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1455                               InstrStage<1, [A9_NPipe]>],
1456                              [6, 2, 2]>,
1457  //
1458  // Quad-register Integer Multiply (.8, .16)
1459  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1460                               InstrStage<1, [A9_MUX0], 0>,
1461                               InstrStage<1, [A9_DRegsN],   0, Required>,
1462                               // Extra latency cycles since wbck is 7 cycles
1463                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1464                               InstrStage<2, [A9_NPipe]>],
1465                              [7, 2, 2]>,
1466
1467  //
1468  // Double-register Integer Multiply (.32)
1469  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                               InstrStage<1, [A9_MUX0], 0>,
1471                               InstrStage<1, [A9_DRegsN],   0, Required>,
1472                               // Extra latency cycles since wbck is 7 cycles
1473                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1474                               InstrStage<2, [A9_NPipe]>],
1475                              [7, 2, 1]>,
1476  //
1477  // Quad-register Integer Multiply (.32)
1478  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479                               InstrStage<1, [A9_MUX0], 0>,
1480                               InstrStage<1, [A9_DRegsN],   0, Required>,
1481                               // Extra latency cycles since wbck is 9 cycles
1482                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1483                               InstrStage<4, [A9_NPipe]>],
1484                              [9, 2, 1]>,
1485  //
1486  // Double-register Integer Multiply-Accumulate (.8, .16)
1487  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1488                               InstrStage<1, [A9_MUX0], 0>,
1489                               InstrStage<1, [A9_DRegsN],   0, Required>,
1490                               // Extra latency cycles since wbck is 6 cycles
1491                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1492                               InstrStage<1, [A9_NPipe]>],
1493                              [6, 3, 2, 2]>,
1494  //
1495  // Double-register Integer Multiply-Accumulate (.32)
1496  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1497                               InstrStage<1, [A9_MUX0], 0>,
1498                               InstrStage<1, [A9_DRegsN],   0, Required>,
1499                               // Extra latency cycles since wbck is 7 cycles
1500                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1501                               InstrStage<2, [A9_NPipe]>],
1502                              [7, 3, 2, 1]>,
1503  //
1504  // Quad-register Integer Multiply-Accumulate (.8, .16)
1505  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1506                               InstrStage<1, [A9_MUX0], 0>,
1507                               InstrStage<1, [A9_DRegsN],   0, Required>,
1508                               // Extra latency cycles since wbck is 7 cycles
1509                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1510                               InstrStage<2, [A9_NPipe]>],
1511                              [7, 3, 2, 2]>,
1512  //
1513  // Quad-register Integer Multiply-Accumulate (.32)
1514  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1515                               InstrStage<1, [A9_MUX0], 0>,
1516                               InstrStage<1, [A9_DRegsN],   0, Required>,
1517                               // Extra latency cycles since wbck is 9 cycles
1518                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1519                               InstrStage<4, [A9_NPipe]>],
1520                              [9, 3, 2, 1]>,
1521
1522  //
1523  // Move
1524  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1525                               InstrStage<1, [A9_MUX0], 0>,
1526                               InstrStage<1, [A9_DRegsN],   0, Required>,
1527                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1528                               InstrStage<1, [A9_NPipe]>],
1529                              [1,1]>,
1530  //
1531  // Move Immediate
1532  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1533                               InstrStage<1, [A9_MUX0], 0>,
1534                               InstrStage<1, [A9_DRegsN],   0, Required>,
1535                               // Extra latency cycles since wbck is 6 cycles
1536                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1537                               InstrStage<1, [A9_NPipe]>],
1538                              [3]>,
1539  //
1540  // Double-register Permute Move
1541  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1542                               InstrStage<1, [A9_MUX0], 0>,
1543                               InstrStage<1, [A9_DRegsN],   0, Required>,
1544                               // Extra latency cycles since wbck is 6 cycles
1545                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1546                               InstrStage<1, [A9_NPipe]>],
1547                              [2, 1]>,
1548  //
1549  // Quad-register Permute Move
1550  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1551                               InstrStage<1, [A9_MUX0], 0>,
1552                               InstrStage<1, [A9_DRegsN],   0, Required>,
1553                               // Extra latency cycles since wbck is 6 cycles
1554                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1555                               InstrStage<1, [A9_NPipe]>],
1556                              [2, 1]>,
1557  //
1558  // Integer to Single-precision Move
1559  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1560                               InstrStage<1, [A9_MUX0], 0>,
1561                               InstrStage<1, [A9_DRegsN],   0, Required>,
1562                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1563                               InstrStage<1, [A9_NPipe]>],
1564                              [1, 1]>,
1565  //
1566  // Integer to Double-precision Move
1567  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1568                               InstrStage<1, [A9_MUX0], 0>,
1569                               InstrStage<1, [A9_DRegsN],   0, Required>,
1570                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1571                               InstrStage<1, [A9_NPipe]>],
1572                              [1, 1, 1]>,
1573  //
1574  // Single-precision to Integer Move
1575  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1576                               InstrStage<1, [A9_MUX0], 0>,
1577                               InstrStage<1, [A9_DRegsN],   0, Required>,
1578                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1579                               InstrStage<1, [A9_NPipe]>],
1580                              [2, 1]>,
1581  //
1582  // Double-precision to Integer Move
1583  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1584                               InstrStage<1, [A9_MUX0], 0>,
1585                               InstrStage<1, [A9_DRegsN],   0, Required>,
1586                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1587                               InstrStage<1, [A9_NPipe]>],
1588                              [2, 2, 1]>,
1589  //
1590  // Integer to Lane Move
1591  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1592                               InstrStage<1, [A9_MUX0], 0>,
1593                               InstrStage<1, [A9_DRegsN],   0, Required>,
1594                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1595                               InstrStage<2, [A9_NPipe]>],
1596                              [3, 1, 1]>,
1597
1598  //
1599  // Vector narrow move
1600  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1601                               InstrStage<1, [A9_MUX0], 0>,
1602                               InstrStage<1, [A9_DRegsN],   0, Required>,
1603                               // Extra latency cycles since wbck is 6 cycles
1604                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1605                               InstrStage<1, [A9_NPipe]>],
1606                              [3, 1]>,
1607  //
1608  // Double-register FP Unary
1609  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1610                               InstrStage<1, [A9_MUX0], 0>,
1611                               InstrStage<1, [A9_DRegsN],   0, Required>,
1612                               // Extra latency cycles since wbck is 6 cycles
1613                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1614                               InstrStage<1, [A9_NPipe]>],
1615                              [5, 2]>,
1616  //
1617  // Quad-register FP Unary
1618  // Result written in N5, but that is relative to the last cycle of multicycle,
1619  // so we use 6 for those cases
1620  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                               InstrStage<1, [A9_MUX0], 0>,
1622                               InstrStage<1, [A9_DRegsN],   0, Required>,
1623                               // Extra latency cycles since wbck is 7 cycles
1624                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1625                               InstrStage<2, [A9_NPipe]>],
1626                              [6, 2]>,
1627  //
1628  // Double-register FP Binary
1629  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1630  // optimistic.
1631  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1632                               InstrStage<1, [A9_MUX0], 0>,
1633                               InstrStage<1, [A9_DRegsN],   0, Required>,
1634                               // Extra latency cycles since wbck is 6 cycles
1635                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1636                               InstrStage<1, [A9_NPipe]>],
1637                              [5, 2, 2]>,
1638
1639  //
1640  // VPADD, etc.
1641  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1642                               InstrStage<1, [A9_MUX0], 0>,
1643                               InstrStage<1, [A9_DRegsN],   0, Required>,
1644                               // Extra latency cycles since wbck is 6 cycles
1645                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1646                               InstrStage<1, [A9_NPipe]>],
1647                              [5, 1, 1]>,
1648  //
1649  // Double-register FP VMUL
1650  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1651                               InstrStage<1, [A9_MUX0], 0>,
1652                               InstrStage<1, [A9_DRegsN],   0, Required>,
1653                               // Extra latency cycles since wbck is 6 cycles
1654                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1655                               InstrStage<1, [A9_NPipe]>],
1656                              [5, 2, 1]>,
1657  //
1658  // Quad-register FP Binary
1659  // Result written in N5, but that is relative to the last cycle of multicycle,
1660  // so we use 6 for those cases
1661  // FIXME: We're using this itin for many instructions and [2, 2] here is too
1662  // optimistic.
1663  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1664                               InstrStage<1, [A9_MUX0], 0>,
1665                               InstrStage<1, [A9_DRegsN],   0, Required>,
1666                               // Extra latency cycles since wbck is 7 cycles
1667                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1668                               InstrStage<2, [A9_NPipe]>],
1669                              [6, 2, 2]>,
1670  //
1671  // Quad-register FP VMUL
1672  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1673                               InstrStage<1, [A9_MUX0], 0>,
1674                               InstrStage<1, [A9_DRegsN],   0, Required>,
1675                               // Extra latency cycles since wbck is 7 cycles
1676                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1677                               InstrStage<1, [A9_NPipe]>],
1678                              [6, 2, 1]>,
1679  //
1680  // Double-register FP Multiple-Accumulate
1681  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1682                               InstrStage<1, [A9_MUX0], 0>,
1683                               InstrStage<1, [A9_DRegsN],   0, Required>,
1684                               // Extra latency cycles since wbck is 7 cycles
1685                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1686                               InstrStage<2, [A9_NPipe]>],
1687                              [6, 3, 2, 1]>,
1688  //
1689  // Quad-register FP Multiple-Accumulate
1690  // Result written in N9, but that is relative to the last cycle of multicycle,
1691  // so we use 10 for those cases
1692  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693                               InstrStage<1, [A9_MUX0], 0>,
1694                               InstrStage<1, [A9_DRegsN],   0, Required>,
1695                               // Extra latency cycles since wbck is 9 cycles
1696                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1697                               InstrStage<4, [A9_NPipe]>],
1698                              [8, 4, 2, 1]>,
1699  //
1700  // Double-register Reciprical Step
1701  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702                               InstrStage<1, [A9_MUX0], 0>,
1703                               InstrStage<1, [A9_DRegsN],   0, Required>,
1704                               // Extra latency cycles since wbck is 10 cycles
1705                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1706                               InstrStage<1, [A9_NPipe]>],
1707                              [9, 2, 2]>,
1708  //
1709  // Quad-register Reciprical Step
1710  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1711                               InstrStage<1, [A9_MUX0], 0>,
1712                               InstrStage<1, [A9_DRegsN],   0, Required>,
1713                               // Extra latency cycles since wbck is 11 cycles
1714                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1715                               InstrStage<2, [A9_NPipe]>],
1716                              [10, 2, 2]>,
1717  //
1718  // Double-register Permute
1719  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1720                               InstrStage<1, [A9_MUX0], 0>,
1721                               InstrStage<1, [A9_DRegsN],   0, Required>,
1722                               // Extra latency cycles since wbck is 6 cycles
1723                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1724                               InstrStage<1, [A9_NPipe]>],
1725                              [2, 2, 1, 1]>,
1726  //
1727  // Quad-register Permute
1728  // Result written in N2, but that is relative to the last cycle of multicycle,
1729  // so we use 3 for those cases
1730  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731                               InstrStage<1, [A9_MUX0], 0>,
1732                               InstrStage<1, [A9_DRegsN],   0, Required>,
1733                               // Extra latency cycles since wbck is 7 cycles
1734                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1735                               InstrStage<2, [A9_NPipe]>],
1736                              [3, 3, 1, 1]>,
1737  //
1738  // Quad-register Permute (3 cycle issue)
1739  // Result written in N2, but that is relative to the last cycle of multicycle,
1740  // so we use 4 for those cases
1741  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742                               InstrStage<1, [A9_MUX0], 0>,
1743                               InstrStage<1, [A9_DRegsN],   0, Required>,
1744                               // Extra latency cycles since wbck is 8 cycles
1745                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1746                               InstrStage<3, [A9_NPipe]>],
1747                              [4, 4, 1, 1]>,
1748
1749  //
1750  // Double-register VEXT
1751  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1752                               InstrStage<1, [A9_MUX0], 0>,
1753                               InstrStage<1, [A9_DRegsN],   0, Required>,
1754                               // Extra latency cycles since wbck is 6 cycles
1755                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1756                               InstrStage<1, [A9_NPipe]>],
1757                              [2, 1, 1]>,
1758  //
1759  // Quad-register VEXT
1760  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1761                               InstrStage<1, [A9_MUX0], 0>,
1762                               InstrStage<1, [A9_DRegsN],   0, Required>,
1763                               // Extra latency cycles since wbck is 7 cycles
1764                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1765                               InstrStage<2, [A9_NPipe]>],
1766                              [3, 1, 2]>,
1767  //
1768  // VTB
1769  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1770                               InstrStage<1, [A9_MUX0], 0>,
1771                               InstrStage<1, [A9_DRegsN],   0, Required>,
1772                               // Extra latency cycles since wbck is 7 cycles
1773                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1774                               InstrStage<2, [A9_NPipe]>],
1775                              [3, 2, 1]>,
1776  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1777                               InstrStage<1, [A9_MUX0], 0>,
1778                               InstrStage<2, [A9_DRegsN],   0, Required>,
1779                               // Extra latency cycles since wbck is 7 cycles
1780                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1781                               InstrStage<2, [A9_NPipe]>],
1782                              [3, 2, 2, 1]>,
1783  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1784                               InstrStage<1, [A9_MUX0], 0>,
1785                               InstrStage<2, [A9_DRegsN],   0, Required>,
1786                               // Extra latency cycles since wbck is 8 cycles
1787                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1788                               InstrStage<3, [A9_NPipe]>],
1789                              [4, 2, 2, 3, 1]>,
1790  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1791                               InstrStage<1, [A9_MUX0], 0>,
1792                               InstrStage<1, [A9_DRegsN],   0, Required>,
1793                               // Extra latency cycles since wbck is 8 cycles
1794                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1795                               InstrStage<3, [A9_NPipe]>],
1796                              [4, 2, 2, 3, 3, 1]>,
1797  //
1798  // VTBX
1799  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1800                               InstrStage<1, [A9_MUX0], 0>,
1801                               InstrStage<1, [A9_DRegsN],   0, Required>,
1802                               // Extra latency cycles since wbck is 7 cycles
1803                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1804                               InstrStage<2, [A9_NPipe]>],
1805                              [3, 1, 2, 1]>,
1806  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1807                               InstrStage<1, [A9_MUX0], 0>,
1808                               InstrStage<1, [A9_DRegsN],   0, Required>,
1809                               // Extra latency cycles since wbck is 7 cycles
1810                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1811                               InstrStage<2, [A9_NPipe]>],
1812                              [3, 1, 2, 2, 1]>,
1813  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1814                               InstrStage<1, [A9_MUX0], 0>,
1815                               InstrStage<1, [A9_DRegsN],   0, Required>,
1816                               // Extra latency cycles since wbck is 8 cycles
1817                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1818                               InstrStage<3, [A9_NPipe]>],
1819                              [4, 1, 2, 2, 3, 1]>,
1820  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1821                               InstrStage<1, [A9_MUX0], 0>,
1822                               InstrStage<1, [A9_DRegsN],   0, Required>,
1823                               // Extra latency cycles since wbck is 8 cycles
1824                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1825                               InstrStage<2, [A9_NPipe]>],
1826                              [4, 1, 2, 2, 3, 3, 1]>
1827]>;
1828