1//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the itinerary class data for the ARM Cortex A8 processors.
11//
12//===----------------------------------------------------------------------===//
13
14//
15// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16// Functional Units.
17def A8_Pipe0   : FuncUnit; // pipeline 0
18def A8_Pipe1   : FuncUnit; // pipeline 1
19def A8_LSPipe  : FuncUnit; // Load / store pipeline
20def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
21def A8_NLSPipe : FuncUnit; // NEON LS pipe
22//
23// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
24//
25def CortexA8Itineraries : ProcessorItineraries<
26  [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
27  [], [
28  // Two fully-pipelined integer ALU pipelines
29  //
30  // No operand cycles
31  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
32  //
33  // Binary Instructions that produce a result
34  InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
35  InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
36  InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
37  InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
38  InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39  //
40  // Bitwise Instructions that produce a result
41  InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42  InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
43  InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
44  InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
45  //
46  // Unary Instructions that produce a result
47  InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49  //
50  // Zero and sign extension instructions
51  InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
52  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
53  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
54  //
55  // Compare instructions
56  InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
57  InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
58  InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
59  InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
60  //
61  // Test instructions
62  InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
63  InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
64  InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
65  InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
66  //
67  // Move instructions, unconditional
68  InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
69  InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
70  InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
71  InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
72  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
73                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
74  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
75                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
76                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
77  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
78                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
79                               InstrStage<1, [A8_LSPipe]>], [5]>,
80  //
81  // Move instructions, conditional
82  InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
83  InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
84  InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
85  InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
86  InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
87                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
88  //
89  // MVN instructions
90  InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
91  InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
92  InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
93  InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
94
95  // Integer multiply pipeline
96  // Result written in E5, but that is relative to the last cycle of multicycle,
97  // so we use 6 for those cases
98  //
99  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
100  InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
101  InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
102  InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
103  InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
104  InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
105
106  // Integer load pipeline
107  //
108  // Immediate offset
109  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
110                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
111  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
112                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
113  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
114                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
115  //
116  // Register offset
117  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
118                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
119  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
120                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
121  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
122                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
123  //
124  // Scaled register offset, issues over 2 cycles
125  // FIXME: lsl by 2 takes 1 cycle.
126  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
127                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
128  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
129                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
130  //
131  // Immediate offset with update
132  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
133                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
134  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
135                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
136  //
137  // Register offset with update
138  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
139                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
140  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
141                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
142  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
143                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
144  //
145  // Scaled register offset with update, issues over 2 cycles
146  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
147                                 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
148  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
149                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
150  //
151  // Load multiple, def is the 5th operand. Pipeline 0 only.
152  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
153  InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
154                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
155  //
156  // Load multiple + update, defs are the 1st and 5th operands.
157  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
158                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
159  //
160  // Load multiple plus branch
161  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
162                                InstrStage<3, [A8_LSPipe]>,
163                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
164                               [1, 2, 1, 1, 3]>,
165  //
166  // Pop, def is the 3rd operand.
167  InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
168                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
169  //
170  // Push, def is the 3th operand.
171  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
172                                InstrStage<3, [A8_LSPipe]>,
173                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
174                               [1, 1, 3]>,
175
176  //
177  // iLoadi + iALUr for t2LDRpci_pic.
178  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
179                                InstrStage<1, [A8_LSPipe]>,
180                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
181
182
183  // Integer store pipeline
184  //
185  // Immediate offset
186  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
187                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
188  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
189                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
190  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
191                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
192  //
193  // Register offset
194  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
195                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
196  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
197                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
198  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
199                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
200  //
201  // Scaled register offset, issues over 2 cycles
202  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
203                                 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
204  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
205                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
206  //
207  // Immediate offset with update
208  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
209                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
210  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
211                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
212  //
213  // Register offset with update
214  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
215                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
216  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
217                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
218  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
219                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
220  //
221  // Scaled register offset with update, issues over 2 cycles
222  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
223                                 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
224  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
225                                   InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
226  //
227  // Store multiple. Pipeline 0 only.
228  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
229  InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
230                                InstrStage<2, [A8_LSPipe]>]>,
231  //
232  // Store multiple + update
233  InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
234                                InstrStage<2, [A8_LSPipe]>], [2]>,
235
236  //
237  // Preload
238  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
239
240  // Branch
241  //
242  // no delay slots, so the latency of a branch is unimportant
243  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
244
245  // VFP
246  // Issue through integer pipeline, and execute in NEON unit. We assume
247  // RunFast mode so that NFP pipeline is used for single-precision when
248  // possible.
249  //
250  // FP Special Register to Integer Register File Move
251  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
252                              InstrStage<1, [A8_NLSPipe]>], [20]>,
253  //
254  // Single-precision FP Unary
255  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
256                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
257  //
258  // Double-precision FP Unary
259  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
260                               InstrStage<4, [A8_NPipe], 0>,
261                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
262  //
263  // Single-precision FP Compare
264  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
265                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
266  //
267  // Double-precision FP Compare
268  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
269                               InstrStage<4, [A8_NPipe], 0>,
270                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
271  //
272  // Single to Double FP Convert
273  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
274                               InstrStage<7, [A8_NPipe], 0>,
275                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
276  //
277  // Double to Single FP Convert
278  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
279                               InstrStage<5, [A8_NPipe], 0>,
280                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
281  //
282  // Single-Precision FP to Integer Convert
283  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
284                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
285  //
286  // Double-Precision FP to Integer Convert
287  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
288                               InstrStage<8, [A8_NPipe], 0>,
289                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
290  //
291  // Integer to Single-Precision FP Convert
292  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
293                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
294  //
295  // Integer to Double-Precision FP Convert
296  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
297                               InstrStage<8, [A8_NPipe], 0>,
298                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
299  //
300  // Single-precision FP ALU
301  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
302                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
303  //
304  // Double-precision FP ALU
305  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
306                               InstrStage<9, [A8_NPipe], 0>,
307                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
308  //
309  // Single-precision FP Multiply
310  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
311                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
312  //
313  // Double-precision FP Multiply
314  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
315                               InstrStage<11, [A8_NPipe], 0>,
316                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
317  //
318  // Single-precision FP MAC
319  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
320                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
321  //
322  // Double-precision FP MAC
323  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
324                               InstrStage<19, [A8_NPipe], 0>,
325                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
326  //
327  // Single-precision FP DIV
328  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
329                               InstrStage<20, [A8_NPipe], 0>,
330                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
331  //
332  // Double-precision FP DIV
333  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
334                               InstrStage<29, [A8_NPipe], 0>,
335                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
336  //
337  // Single-precision FP SQRT
338  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
339                               InstrStage<19, [A8_NPipe], 0>,
340                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
341  //
342  // Double-precision FP SQRT
343  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
344                               InstrStage<29, [A8_NPipe], 0>,
345                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
346
347  //
348  // Integer to Single-precision Move
349  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
350                               InstrStage<1, [A8_NPipe]>],
351                              [2, 1]>,
352  //
353  // Integer to Double-precision Move
354  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
355                               InstrStage<1, [A8_NPipe]>],
356                              [2, 1, 1]>,
357  //
358  // Single-precision to Integer Move
359  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
360                               InstrStage<1, [A8_NPipe]>],
361                              [20, 1]>,
362  //
363  // Double-precision to Integer Move
364  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
365                               InstrStage<1, [A8_NPipe]>],
366                              [20, 20, 1]>,
367
368  //
369  // Single-precision FP Load
370  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
371                               InstrStage<1, [A8_NLSPipe], 0>,
372                               InstrStage<1, [A8_LSPipe]>],
373                              [2, 1]>,
374  //
375  // Double-precision FP Load
376  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
377                               InstrStage<1, [A8_NLSPipe], 0>,
378                               InstrStage<1, [A8_LSPipe]>],
379                              [2, 1]>,
380  //
381  // FP Load Multiple
382  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
383  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
384                               InstrStage<1, [A8_NLSPipe], 0>,
385                               InstrStage<1, [A8_LSPipe]>,
386                               InstrStage<1, [A8_NLSPipe], 0>,
387                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
388  //
389  // FP Load Multiple + update
390  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
391                               InstrStage<1, [A8_NLSPipe], 0>,
392                               InstrStage<1, [A8_LSPipe]>,
393                               InstrStage<1, [A8_NLSPipe], 0>,
394                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
395  //
396  // Single-precision FP Store
397  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
398                               InstrStage<1, [A8_NLSPipe], 0>,
399                               InstrStage<1, [A8_LSPipe]>],
400                              [1, 1]>,
401  //
402  // Double-precision FP Store
403  InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
404                               InstrStage<1, [A8_NLSPipe], 0>,
405                               InstrStage<1, [A8_LSPipe]>],
406                              [1, 1]>,
407  //
408  // FP Store Multiple
409  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
410                               InstrStage<1, [A8_NLSPipe], 0>,
411                               InstrStage<1, [A8_LSPipe]>,
412                               InstrStage<1, [A8_NLSPipe], 0>,
413                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
414  //
415  // FP Store Multiple + update
416  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
417                                InstrStage<1, [A8_NLSPipe], 0>,
418                                InstrStage<1, [A8_LSPipe]>,
419                                InstrStage<1, [A8_NLSPipe], 0>,
420                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
421
422  // NEON
423  // Issue through integer pipeline, and execute in NEON unit.
424  //
425  // VLD1
426  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
427                               InstrStage<2, [A8_NLSPipe], 0>,
428                               InstrStage<2, [A8_LSPipe]>],
429                              [2, 1]>,
430  // VLD1x2
431  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
432                               InstrStage<2, [A8_NLSPipe], 0>,
433                               InstrStage<2, [A8_LSPipe]>],
434                              [2, 2, 1]>,
435  //
436  // VLD1x3
437  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
438                               InstrStage<3, [A8_NLSPipe], 0>,
439                               InstrStage<3, [A8_LSPipe]>],
440                              [2, 2, 3, 1]>,
441  //
442  // VLD1x4
443  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
444                               InstrStage<3, [A8_NLSPipe], 0>,
445                               InstrStage<3, [A8_LSPipe]>],
446                              [2, 2, 3, 3, 1]>,
447  //
448  // VLD1u
449  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
450                               InstrStage<2, [A8_NLSPipe], 0>,
451                               InstrStage<2, [A8_LSPipe]>],
452                              [2, 2, 1]>,
453  //
454  // VLD1x2u
455  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
456                               InstrStage<2, [A8_NLSPipe], 0>,
457                               InstrStage<2, [A8_LSPipe]>],
458                              [2, 2, 2, 1]>,
459  //
460  // VLD1x3u
461  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
462                               InstrStage<3, [A8_NLSPipe], 0>,
463                               InstrStage<3, [A8_LSPipe]>],
464                              [2, 2, 3, 2, 1]>,
465  //
466  // VLD1x4u
467  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
468                               InstrStage<3, [A8_NLSPipe], 0>,
469                               InstrStage<3, [A8_LSPipe]>],
470                              [2, 2, 3, 3, 2, 1]>,
471  //
472  // VLD1ln
473  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
474                               InstrStage<3, [A8_NLSPipe], 0>,
475                               InstrStage<3, [A8_LSPipe]>],
476                              [3, 1, 1, 1]>,
477  //
478  // VLD1lnu
479  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
480                               InstrStage<3, [A8_NLSPipe], 0>,
481                               InstrStage<3, [A8_LSPipe]>],
482                              [3, 2, 1, 1, 1, 1]>,
483  //
484  // VLD1dup
485  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
486                               InstrStage<2, [A8_NLSPipe], 0>,
487                               InstrStage<2, [A8_LSPipe]>],
488                              [2, 1]>,
489  //
490  // VLD1dupu
491  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
492                               InstrStage<2, [A8_NLSPipe], 0>,
493                               InstrStage<2, [A8_LSPipe]>],
494                              [2, 2, 1, 1]>,
495  //
496  // VLD2
497  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
498                               InstrStage<2, [A8_NLSPipe], 0>,
499                               InstrStage<2, [A8_LSPipe]>],
500                              [2, 2, 1]>,
501  //
502  // VLD2x2
503  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
504                               InstrStage<3, [A8_NLSPipe], 0>,
505                               InstrStage<3, [A8_LSPipe]>],
506                              [2, 2, 3, 3, 1]>,
507  //
508  // VLD2ln
509  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
510                               InstrStage<3, [A8_NLSPipe], 0>,
511                               InstrStage<3, [A8_LSPipe]>],
512                              [3, 3, 1, 1, 1, 1]>,
513  //
514  // VLD2u
515  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
516                               InstrStage<2, [A8_NLSPipe], 0>,
517                               InstrStage<2, [A8_LSPipe]>],
518                              [2, 2, 2, 1, 1, 1]>,
519  //
520  // VLD2x2u
521  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
522                               InstrStage<3, [A8_NLSPipe], 0>,
523                               InstrStage<3, [A8_LSPipe]>],
524                              [2, 2, 3, 3, 2, 1]>,
525  //
526  // VLD2lnu
527  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
528                               InstrStage<3, [A8_NLSPipe], 0>,
529                               InstrStage<3, [A8_LSPipe]>],
530                              [3, 3, 2, 1, 1, 1, 1, 1]>,
531  //
532  // VLD2dup
533  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
534                               InstrStage<2, [A8_NLSPipe], 0>,
535                               InstrStage<2, [A8_LSPipe]>],
536                              [2, 2, 1]>,
537  //
538  // VLD2dupu
539  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
540                               InstrStage<2, [A8_NLSPipe], 0>,
541                               InstrStage<2, [A8_LSPipe]>],
542                              [2, 2, 2, 1, 1]>,
543  //
544  // VLD3
545  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
546                               InstrStage<4, [A8_NLSPipe], 0>,
547                               InstrStage<4, [A8_LSPipe]>],
548                              [3, 3, 4, 1]>,
549  //
550  // VLD3ln
551  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
552                               InstrStage<5, [A8_NLSPipe], 0>,
553                               InstrStage<5, [A8_LSPipe]>],
554                              [4, 4, 5, 1, 1, 1, 1, 2]>,
555  //
556  // VLD3u
557  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
558                               InstrStage<4, [A8_NLSPipe], 0>,
559                               InstrStage<4, [A8_LSPipe]>],
560                              [3, 3, 4, 2, 1]>,
561  //
562  // VLD3lnu
563  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
564                               InstrStage<5, [A8_NLSPipe], 0>,
565                               InstrStage<5, [A8_LSPipe]>],
566                              [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
567  //
568  // VLD3dup
569  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
570                               InstrStage<3, [A8_NLSPipe], 0>,
571                               InstrStage<3, [A8_LSPipe]>],
572                              [2, 2, 3, 1]>,
573  //
574  // VLD3dupu
575  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
576                               InstrStage<3, [A8_NLSPipe], 0>,
577                               InstrStage<3, [A8_LSPipe]>],
578                              [2, 2, 3, 2, 1, 1]>,
579  //
580  // VLD4
581  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
582                               InstrStage<4, [A8_NLSPipe], 0>,
583                               InstrStage<4, [A8_LSPipe]>],
584                              [3, 3, 4, 4, 1]>,
585  //
586  // VLD4ln
587  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
588                               InstrStage<5, [A8_NLSPipe], 0>,
589                               InstrStage<5, [A8_LSPipe]>],
590                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
591  //
592  // VLD4u
593  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
594                               InstrStage<4, [A8_NLSPipe], 0>,
595                               InstrStage<4, [A8_LSPipe]>],
596                              [3, 3, 4, 4, 2, 1]>,
597  //
598  // VLD4lnu
599  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
600                               InstrStage<5, [A8_NLSPipe], 0>,
601                               InstrStage<5, [A8_LSPipe]>],
602                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
603  //
604  // VLD4dup
605  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
606                               InstrStage<3, [A8_NLSPipe], 0>,
607                               InstrStage<3, [A8_LSPipe]>],
608                              [2, 2, 3, 3, 1]>,
609  //
610  // VLD4dupu
611  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
612                               InstrStage<3, [A8_NLSPipe], 0>,
613                               InstrStage<3, [A8_LSPipe]>],
614                              [2, 2, 3, 3, 2, 1, 1]>,
615  //
616  // VST1
617  InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
618                               InstrStage<2, [A8_NLSPipe], 0>,
619                               InstrStage<2, [A8_LSPipe]>],
620                              [1, 1, 1]>,
621  //
622  // VST1x2
623  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
624                               InstrStage<2, [A8_NLSPipe], 0>,
625                               InstrStage<2, [A8_LSPipe]>],
626                              [1, 1, 1, 1]>,
627  //
628  // VST1x3
629  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
630                               InstrStage<3, [A8_NLSPipe], 0>,
631                               InstrStage<3, [A8_LSPipe]>],
632                              [1, 1, 1, 1, 2]>,
633  //
634  // VST1x4
635  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
636                               InstrStage<3, [A8_NLSPipe], 0>,
637                               InstrStage<3, [A8_LSPipe]>],
638                              [1, 1, 1, 1, 2, 2]>,
639  //
640  // VST1u
641  InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
642                               InstrStage<2, [A8_NLSPipe], 0>,
643                               InstrStage<2, [A8_LSPipe]>],
644                              [2, 1, 1, 1, 1]>,
645  //
646  // VST1x2u
647  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
648                               InstrStage<2, [A8_NLSPipe], 0>,
649                               InstrStage<2, [A8_LSPipe]>],
650                              [2, 1, 1, 1, 1, 1]>,
651  //
652  // VST1x3u
653  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
654                               InstrStage<3, [A8_NLSPipe], 0>,
655                               InstrStage<3, [A8_LSPipe]>],
656                              [2, 1, 1, 1, 1, 1, 2]>,
657  //
658  // VST1x4u
659  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
660                               InstrStage<3, [A8_NLSPipe], 0>,
661                               InstrStage<3, [A8_LSPipe]>],
662                              [2, 1, 1, 1, 1, 1, 2, 2]>,
663  //
664  // VST1ln
665  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
666                               InstrStage<2, [A8_NLSPipe], 0>,
667                               InstrStage<2, [A8_LSPipe]>],
668                              [1, 1, 1]>,
669  //
670  // VST1lnu
671  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
672                               InstrStage<2, [A8_NLSPipe], 0>,
673                               InstrStage<2, [A8_LSPipe]>],
674                              [2, 1, 1, 1, 1]>,
675  //
676  // VST2
677  InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
678                               InstrStage<2, [A8_NLSPipe], 0>,
679                               InstrStage<2, [A8_LSPipe]>],
680                              [1, 1, 1, 1]>,
681  //
682  // VST2x2
683  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
684                               InstrStage<4, [A8_NLSPipe], 0>,
685                               InstrStage<4, [A8_LSPipe]>],
686                              [1, 1, 1, 1, 2, 2]>,
687  //
688  // VST2u
689  InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
690                               InstrStage<2, [A8_NLSPipe], 0>,
691                               InstrStage<2, [A8_LSPipe]>],
692                              [2, 1, 1, 1, 1, 1]>,
693  //
694  // VST2x2u
695  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
696                               InstrStage<4, [A8_NLSPipe], 0>,
697                               InstrStage<4, [A8_LSPipe]>],
698                              [2, 1, 1, 1, 1, 1, 2, 2]>,
699  //
700  // VST2ln
701  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
702                               InstrStage<2, [A8_NLSPipe], 0>,
703                               InstrStage<2, [A8_LSPipe]>],
704                              [1, 1, 1, 1]>,
705  //
706  // VST2lnu
707  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
708                               InstrStage<2, [A8_NLSPipe], 0>,
709                               InstrStage<2, [A8_LSPipe]>],
710                              [2, 1, 1, 1, 1, 1]>,
711  //
712  // VST3
713  InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
714                               InstrStage<3, [A8_NLSPipe], 0>,
715                               InstrStage<3, [A8_LSPipe]>],
716                              [1, 1, 1, 1, 2]>,
717  //
718  // VST3u
719  InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
720                               InstrStage<3, [A8_NLSPipe], 0>,
721                               InstrStage<3, [A8_LSPipe]>],
722                              [2, 1, 1, 1, 1, 1, 2]>,
723  //
724  // VST3ln
725  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
726                               InstrStage<3, [A8_NLSPipe], 0>,
727                               InstrStage<3, [A8_LSPipe]>],
728                              [1, 1, 1, 1, 2]>,
729  //
730  // VST3lnu
731  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
732                               InstrStage<3, [A8_NLSPipe], 0>,
733                               InstrStage<3, [A8_LSPipe]>],
734                              [2, 1, 1, 1, 1, 1, 2]>,
735  //
736  // VST4
737  InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
738                               InstrStage<4, [A8_NLSPipe], 0>,
739                               InstrStage<4, [A8_LSPipe]>],
740                              [1, 1, 1, 1, 2, 2]>,
741  //
742  // VST4u
743  InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
744                               InstrStage<4, [A8_NLSPipe], 0>,
745                               InstrStage<4, [A8_LSPipe]>],
746                              [2, 1, 1, 1, 1, 1, 2, 2]>,
747  //
748  // VST4ln
749  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
750                               InstrStage<4, [A8_NLSPipe], 0>,
751                               InstrStage<4, [A8_LSPipe]>],
752                              [1, 1, 1, 1, 2, 2]>,
753  //
754  // VST4lnu
755  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
756                               InstrStage<4, [A8_NLSPipe], 0>,
757                               InstrStage<4, [A8_LSPipe]>],
758                              [2, 1, 1, 1, 1, 1, 2, 2]>,
759  //
760  // Double-register FP Unary
761  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
762                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
763  //
764  // Quad-register FP Unary
765  // Result written in N5, but that is relative to the last cycle of multicycle,
766  // so we use 6 for those cases
767  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
768                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
769  //
770  // Double-register FP Binary
771  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
772                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
773  //
774  // VPADD, etc.
775  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
776                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
777  //
778  // Double-register FP VMUL
779  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
780                               InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
781
782  //
783  // Quad-register FP Binary
784  // Result written in N5, but that is relative to the last cycle of multicycle,
785  // so we use 6 for those cases
786  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
787                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
788  //
789  // Quad-register FP VMUL
790  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
791                               InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
792  //
793  // Move
794  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
795                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
796  //
797  // Move Immediate
798  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
799                               InstrStage<1, [A8_NPipe]>], [3]>,
800  //
801  // Double-register Permute Move
802  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
803                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
804  //
805  // Quad-register Permute Move
806  // Result written in N2, but that is relative to the last cycle of multicycle,
807  // so we use 3 for those cases
808  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
809                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
810  //
811  // Integer to Single-precision Move
812  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
813                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
814  //
815  // Integer to Double-precision Move
816  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
817                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
818  //
819  // Single-precision to Integer Move
820  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
821                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
822  //
823  // Double-precision to Integer Move
824  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
825                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
826  //
827  // Integer to Lane Move
828  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
829                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
830  //
831  // Vector narrow move
832  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
833                               InstrStage<1, [A8_NPipe]>], [2, 1]>,
834  //
835  // Double-register Permute
836  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
837                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
838  //
839  // Quad-register Permute
840  // Result written in N2, but that is relative to the last cycle of multicycle,
841  // so we use 3 for those cases
842  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
843                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
844  //
845  // Quad-register Permute (3 cycle issue)
846  // Result written in N2, but that is relative to the last cycle of multicycle,
847  // so we use 4 for those cases
848  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
849                               InstrStage<1, [A8_NLSPipe]>,
850                               InstrStage<1, [A8_NPipe], 0>,
851                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
852  //
853  // Double-register FP Multiple-Accumulate
854  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
855                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
856  //
857  // Quad-register FP Multiple-Accumulate
858  // Result written in N9, but that is relative to the last cycle of multicycle,
859  // so we use 10 for those cases
860  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
861                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
862  //
863  // Double-register Reciprical Step
864  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
865                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
866  //
867  // Quad-register Reciprical Step
868  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
869                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
870  //
871  // Double-register Integer Count
872  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
873                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
874  //
875  // Quad-register Integer Count
876  // Result written in N3, but that is relative to the last cycle of multicycle,
877  // so we use 4 for those cases
878  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
879                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
880  //
881  // Double-register Integer Unary
882  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
883                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
884  //
885  // Quad-register Integer Unary
886  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
887                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
888  //
889  // Double-register Integer Q-Unary
890  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
891                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
892  //
893  // Quad-register Integer CountQ-Unary
894  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
895                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
896  //
897  // Double-register Integer Binary
898  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
899                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
900  //
901  // Quad-register Integer Binary
902  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
903                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
904  //
905  // Double-register Integer Binary (4 cycle)
906  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
907                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
908  //
909  // Quad-register Integer Binary (4 cycle)
910  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
911                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
912
913  //
914  // Double-register Integer Subtract
915  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
916                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
917  //
918  // Quad-register Integer Subtract
919  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
920                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
921  //
922  // Double-register Integer Subtract
923  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
924                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
925  //
926  // Quad-register Integer Subtract
927  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
928                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
929  //
930  // Double-register Integer Shift
931  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
932                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
933  //
934  // Quad-register Integer Shift
935  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
936                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
937  //
938  // Double-register Integer Shift (4 cycle)
939  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
940                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
941  //
942  // Quad-register Integer Shift (4 cycle)
943  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
944                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
945  //
946  // Double-register Integer Pair Add Long
947  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
948                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
949  //
950  // Quad-register Integer Pair Add Long
951  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
952                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
953  //
954  // Double-register Absolute Difference and Accumulate
955  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
956                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
957  //
958  // Quad-register Absolute Difference and Accumulate
959  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
960                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
961
962  //
963  // Double-register Integer Multiply (.8, .16)
964  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
965                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
966  //
967  // Double-register Integer Multiply (.32)
968  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
969                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
970  //
971  // Quad-register Integer Multiply (.8, .16)
972  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
973                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
974  //
975  // Quad-register Integer Multiply (.32)
976  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
977                               InstrStage<1, [A8_NPipe]>,
978                               InstrStage<2, [A8_NLSPipe], 0>,
979                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
980  //
981  // Double-register Integer Multiply-Accumulate (.8, .16)
982  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
983                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
984  //
985  // Double-register Integer Multiply-Accumulate (.32)
986  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
987                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
988  //
989  // Quad-register Integer Multiply-Accumulate (.8, .16)
990  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
991                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
992  //
993  // Quad-register Integer Multiply-Accumulate (.32)
994  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
995                               InstrStage<1, [A8_NPipe]>,
996                               InstrStage<2, [A8_NLSPipe], 0>,
997                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
998  //
999  // Double-register VEXT
1000  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1001                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
1002  //
1003  // Quad-register VEXT
1004  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1005                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
1006  //
1007  // VTB
1008  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1009                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
1010  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1011                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
1012  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1013                               InstrStage<1, [A8_NLSPipe]>,
1014                               InstrStage<1, [A8_NPipe], 0>,
1015                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
1016  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1017                               InstrStage<1, [A8_NLSPipe]>,
1018                               InstrStage<1, [A8_NPipe], 0>,
1019                               InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
1020  //
1021  // VTBX
1022  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1023                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
1024  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1025                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
1026  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1027                               InstrStage<1, [A8_NLSPipe]>,
1028                               InstrStage<1, [A8_NPipe], 0>,
1029                               InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
1030  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1031                               InstrStage<1, [A8_NLSPipe]>,
1032                               InstrStage<1, [A8_NPipe], 0>,
1033                            InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
1034]>;
1035