1//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//===----------------------------------------------------------------------===//
10// Instruction scheduling annotations for out-of-order CPUs.
11// These annotations are independent of the itinerary class defined below.
12// Here we define the subtarget independent read/write per-operand resources.
13// The subtarget schedule definitions will then map these to the subtarget's
14// resource usages.
15// For example:
16// The instruction cycle timings table might contain an entry for an operation
17// like the following:
18// Rd <- ADD Rn, Rm, <shift> Rs
19//  Uops | Latency from register | Uops - resource requirements - latency
20//  2    | Rn: 1 Rm: 4 Rs: 4     | uop T0, Rm, Rs - P01 - 3
21//       |                       | uopc Rd, Rn, T0 -  P01 - 1
22// This is telling us that the result will be available in destination register
23// Rd after a minimum of three cycles after the result in Rm and Rs is available
24// and one cycle after the result in Rn is available. The micro-ops can execute
25// on resource P01.
26// To model this, we need to express that we need to dispatch two micro-ops,
27// that the resource P01 is needed and that the latency to Rn is different than
28// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
29// two.
30// We will do this by assigning (abstract) resources to register defs/uses.
31// ARMSchedule.td:
32//   def WriteALUsr : SchedWrite;
33//   def ReadAdvanceALUsr : ScheRead;
34//
35// ARMInstrInfo.td:
36//   def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
37//                           ReadDefault]> { ...}
38// ReadAdvance read resources allow us to define "pipeline by-passes" or
39// shorter latencies to certain registers as needed in the example above.
40// The "ReadDefault" can be omitted.
41// Next, the subtarget td file assigns resources to the abstract resources
42// defined here.
43// ARMScheduleSubtarget.td:
44//  // Resources.
45//  def P01 : ProcResource<3>; // ALU unit (3 of it).
46//  ...
47//  // Resource usages.
48//  def : WriteRes<WriteALUsr, [P01, P01]> {
49//    Latency = 4; // Latency of 4.
50//    NumMicroOps = 2; // Dispatch 2 micro-ops.
51//    // The two instances of resource P01 are occupied for one cycle. It is one
52//    // cycle because these resources happen to be pipelined.
53//    ResourceCycles = [1, 1];
54//  }
55//  def : ReadAdvance<ReadAdvanceALUsr, 3>;
56
57// Basic ALU operation.
58def WriteALU : SchedWrite;
59def ReadALU : SchedRead;
60
61// Basic ALU with shifts.
62def WriteALUsi : SchedWrite; // Shift by immediate.
63def WriteALUsr : SchedWrite; // Shift by register.
64def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
65def ReadALUsr : SchedRead; // Some operands are read later.
66
67// Compares.
68def WriteCMP : SchedWrite;
69def WriteCMPsi : SchedWrite;
70def WriteCMPsr : SchedWrite;
71
72// Division.
73def WriteDiv : SchedWrite;
74
75// Loads.
76def WriteLd : SchedWrite;
77def WritePreLd : SchedWrite;
78
79// Branches.
80def WriteBr : SchedWrite;
81def WriteBrL : SchedWrite;
82def WriteBrTbl : SchedWrite;
83
84// Fixpoint conversions.
85def WriteCvtFP : SchedWrite;
86
87// Noop.
88def WriteNoop : SchedWrite;
89
90// Define TII for use in SchedVariant Predicates.
91def : PredicateProlog<[{
92  const ARMBaseInstrInfo *TII =
93    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
94  (void)TII;
95}]>;
96
97def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
98
99//===----------------------------------------------------------------------===//
100// Instruction Itinerary classes used for ARM
101//
102def IIC_iALUx      : InstrItinClass;
103def IIC_iALUi      : InstrItinClass;
104def IIC_iALUr      : InstrItinClass;
105def IIC_iALUsi     : InstrItinClass;
106def IIC_iALUsir    : InstrItinClass;
107def IIC_iALUsr     : InstrItinClass;
108def IIC_iBITi      : InstrItinClass;
109def IIC_iBITr      : InstrItinClass;
110def IIC_iBITsi     : InstrItinClass;
111def IIC_iBITsr     : InstrItinClass;
112def IIC_iUNAr      : InstrItinClass;
113def IIC_iUNAsi     : InstrItinClass;
114def IIC_iEXTr      : InstrItinClass;
115def IIC_iEXTAr     : InstrItinClass;
116def IIC_iEXTAsr    : InstrItinClass;
117def IIC_iCMPi      : InstrItinClass;
118def IIC_iCMPr      : InstrItinClass;
119def IIC_iCMPsi     : InstrItinClass;
120def IIC_iCMPsr     : InstrItinClass;
121def IIC_iTSTi      : InstrItinClass;
122def IIC_iTSTr      : InstrItinClass;
123def IIC_iTSTsi     : InstrItinClass;
124def IIC_iTSTsr     : InstrItinClass;
125def IIC_iMOVi      : InstrItinClass;
126def IIC_iMOVr      : InstrItinClass;
127def IIC_iMOVsi     : InstrItinClass;
128def IIC_iMOVsr     : InstrItinClass;
129def IIC_iMOVix2    : InstrItinClass;
130def IIC_iMOVix2addpc : InstrItinClass;
131def IIC_iMOVix2ld  : InstrItinClass;
132def IIC_iMVNi      : InstrItinClass;
133def IIC_iMVNr      : InstrItinClass;
134def IIC_iMVNsi     : InstrItinClass;
135def IIC_iMVNsr     : InstrItinClass;
136def IIC_iCMOVi     : InstrItinClass;
137def IIC_iCMOVr     : InstrItinClass;
138def IIC_iCMOVsi    : InstrItinClass;
139def IIC_iCMOVsr    : InstrItinClass;
140def IIC_iCMOVix2   : InstrItinClass;
141def IIC_iMUL16     : InstrItinClass;
142def IIC_iMAC16     : InstrItinClass;
143def IIC_iMUL32     : InstrItinClass;
144def IIC_iMAC32     : InstrItinClass;
145def IIC_iMUL64     : InstrItinClass;
146def IIC_iMAC64     : InstrItinClass;
147def IIC_iDIV     : InstrItinClass;
148def IIC_iLoad_i    : InstrItinClass;
149def IIC_iLoad_r    : InstrItinClass;
150def IIC_iLoad_si   : InstrItinClass;
151def IIC_iLoad_iu   : InstrItinClass;
152def IIC_iLoad_ru   : InstrItinClass;
153def IIC_iLoad_siu  : InstrItinClass;
154def IIC_iLoad_bh_i   : InstrItinClass;
155def IIC_iLoad_bh_r   : InstrItinClass;
156def IIC_iLoad_bh_si  : InstrItinClass;
157def IIC_iLoad_bh_iu  : InstrItinClass;
158def IIC_iLoad_bh_ru  : InstrItinClass;
159def IIC_iLoad_bh_siu : InstrItinClass;
160def IIC_iLoad_d_i  : InstrItinClass;
161def IIC_iLoad_d_r  : InstrItinClass;
162def IIC_iLoad_d_ru : InstrItinClass;
163def IIC_iLoad_m    : InstrItinClass;
164def IIC_iLoad_mu   : InstrItinClass;
165def IIC_iLoad_mBr  : InstrItinClass;
166def IIC_iPop       : InstrItinClass;
167def IIC_iPop_Br    : InstrItinClass;
168def IIC_iLoadiALU  : InstrItinClass;
169def IIC_iStore_i   : InstrItinClass;
170def IIC_iStore_r   : InstrItinClass;
171def IIC_iStore_si  : InstrItinClass;
172def IIC_iStore_iu  : InstrItinClass;
173def IIC_iStore_ru  : InstrItinClass;
174def IIC_iStore_siu : InstrItinClass;
175def IIC_iStore_bh_i   : InstrItinClass;
176def IIC_iStore_bh_r   : InstrItinClass;
177def IIC_iStore_bh_si  : InstrItinClass;
178def IIC_iStore_bh_iu  : InstrItinClass;
179def IIC_iStore_bh_ru  : InstrItinClass;
180def IIC_iStore_bh_siu : InstrItinClass;
181def IIC_iStore_d_i   : InstrItinClass;
182def IIC_iStore_d_r   : InstrItinClass;
183def IIC_iStore_d_ru  : InstrItinClass;
184def IIC_iStore_m   : InstrItinClass;
185def IIC_iStore_mu  : InstrItinClass;
186def IIC_Preload    : InstrItinClass;
187def IIC_Br         : InstrItinClass;
188def IIC_fpSTAT     : InstrItinClass;
189def IIC_fpUNA16    : InstrItinClass;
190def IIC_fpUNA32    : InstrItinClass;
191def IIC_fpUNA64    : InstrItinClass;
192def IIC_fpCMP16    : InstrItinClass;
193def IIC_fpCMP32    : InstrItinClass;
194def IIC_fpCMP64    : InstrItinClass;
195def IIC_fpCVTSD    : InstrItinClass;
196def IIC_fpCVTDS    : InstrItinClass;
197def IIC_fpCVTSH    : InstrItinClass;
198def IIC_fpCVTHS    : InstrItinClass;
199def IIC_fpCVTIH    : InstrItinClass;
200def IIC_fpCVTIS    : InstrItinClass;
201def IIC_fpCVTID    : InstrItinClass;
202def IIC_fpCVTHI    : InstrItinClass;
203def IIC_fpCVTSI    : InstrItinClass;
204def IIC_fpCVTDI    : InstrItinClass;
205def IIC_fpMOVIS    : InstrItinClass;
206def IIC_fpMOVID    : InstrItinClass;
207def IIC_fpMOVSI    : InstrItinClass;
208def IIC_fpMOVDI    : InstrItinClass;
209def IIC_fpALU16    : InstrItinClass;
210def IIC_fpALU32    : InstrItinClass;
211def IIC_fpALU64    : InstrItinClass;
212def IIC_fpMUL16    : InstrItinClass;
213def IIC_fpMUL32    : InstrItinClass;
214def IIC_fpMUL64    : InstrItinClass;
215def IIC_fpMAC16    : InstrItinClass;
216def IIC_fpMAC32    : InstrItinClass;
217def IIC_fpMAC64    : InstrItinClass;
218def IIC_fpFMAC16   : InstrItinClass;
219def IIC_fpFMAC32   : InstrItinClass;
220def IIC_fpFMAC64   : InstrItinClass;
221def IIC_fpDIV16    : InstrItinClass;
222def IIC_fpDIV32    : InstrItinClass;
223def IIC_fpDIV64    : InstrItinClass;
224def IIC_fpSQRT16   : InstrItinClass;
225def IIC_fpSQRT32   : InstrItinClass;
226def IIC_fpSQRT64   : InstrItinClass;
227def IIC_fpLoad16   : InstrItinClass;
228def IIC_fpLoad32   : InstrItinClass;
229def IIC_fpLoad64   : InstrItinClass;
230def IIC_fpLoad_m   : InstrItinClass;
231def IIC_fpLoad_mu  : InstrItinClass;
232def IIC_fpStore16  : InstrItinClass;
233def IIC_fpStore32  : InstrItinClass;
234def IIC_fpStore64  : InstrItinClass;
235def IIC_fpStore_m  : InstrItinClass;
236def IIC_fpStore_mu : InstrItinClass;
237def IIC_VLD1       : InstrItinClass;
238def IIC_VLD1x2     : InstrItinClass;
239def IIC_VLD1x3     : InstrItinClass;
240def IIC_VLD1x4     : InstrItinClass;
241def IIC_VLD1u      : InstrItinClass;
242def IIC_VLD1x2u    : InstrItinClass;
243def IIC_VLD1x3u    : InstrItinClass;
244def IIC_VLD1x4u    : InstrItinClass;
245def IIC_VLD1ln     : InstrItinClass;
246def IIC_VLD1lnu    : InstrItinClass;
247def IIC_VLD1dup    : InstrItinClass;
248def IIC_VLD1dupu   : InstrItinClass;
249def IIC_VLD2       : InstrItinClass;
250def IIC_VLD2x2     : InstrItinClass;
251def IIC_VLD2u      : InstrItinClass;
252def IIC_VLD2x2u    : InstrItinClass;
253def IIC_VLD2ln     : InstrItinClass;
254def IIC_VLD2lnu    : InstrItinClass;
255def IIC_VLD2dup    : InstrItinClass;
256def IIC_VLD2dupu   : InstrItinClass;
257def IIC_VLD3       : InstrItinClass;
258def IIC_VLD3ln     : InstrItinClass;
259def IIC_VLD3u      : InstrItinClass;
260def IIC_VLD3lnu    : InstrItinClass;
261def IIC_VLD3dup    : InstrItinClass;
262def IIC_VLD3dupu   : InstrItinClass;
263def IIC_VLD4       : InstrItinClass;
264def IIC_VLD4ln     : InstrItinClass;
265def IIC_VLD4u      : InstrItinClass;
266def IIC_VLD4lnu    : InstrItinClass;
267def IIC_VLD4dup    : InstrItinClass;
268def IIC_VLD4dupu   : InstrItinClass;
269def IIC_VST1       : InstrItinClass;
270def IIC_VST1x2     : InstrItinClass;
271def IIC_VST1x3     : InstrItinClass;
272def IIC_VST1x4     : InstrItinClass;
273def IIC_VST1u      : InstrItinClass;
274def IIC_VST1x2u    : InstrItinClass;
275def IIC_VST1x3u    : InstrItinClass;
276def IIC_VST1x4u    : InstrItinClass;
277def IIC_VST1ln     : InstrItinClass;
278def IIC_VST1lnu    : InstrItinClass;
279def IIC_VST2       : InstrItinClass;
280def IIC_VST2x2     : InstrItinClass;
281def IIC_VST2u      : InstrItinClass;
282def IIC_VST2x2u    : InstrItinClass;
283def IIC_VST2ln     : InstrItinClass;
284def IIC_VST2lnu    : InstrItinClass;
285def IIC_VST3       : InstrItinClass;
286def IIC_VST3u      : InstrItinClass;
287def IIC_VST3ln     : InstrItinClass;
288def IIC_VST3lnu    : InstrItinClass;
289def IIC_VST4       : InstrItinClass;
290def IIC_VST4u      : InstrItinClass;
291def IIC_VST4ln     : InstrItinClass;
292def IIC_VST4lnu    : InstrItinClass;
293def IIC_VUNAD      : InstrItinClass;
294def IIC_VUNAQ      : InstrItinClass;
295def IIC_VBIND      : InstrItinClass;
296def IIC_VBINQ      : InstrItinClass;
297def IIC_VPBIND     : InstrItinClass;
298def IIC_VFMULD     : InstrItinClass;
299def IIC_VFMULQ     : InstrItinClass;
300def IIC_VMOV       : InstrItinClass;
301def IIC_VMOVImm    : InstrItinClass;
302def IIC_VMOVD      : InstrItinClass;
303def IIC_VMOVQ      : InstrItinClass;
304def IIC_VMOVIS     : InstrItinClass;
305def IIC_VMOVID     : InstrItinClass;
306def IIC_VMOVISL    : InstrItinClass;
307def IIC_VMOVSI     : InstrItinClass;
308def IIC_VMOVDI     : InstrItinClass;
309def IIC_VMOVN      : InstrItinClass;
310def IIC_VPERMD     : InstrItinClass;
311def IIC_VPERMQ     : InstrItinClass;
312def IIC_VPERMQ3    : InstrItinClass;
313def IIC_VMACD      : InstrItinClass;
314def IIC_VMACQ      : InstrItinClass;
315def IIC_VFMACD     : InstrItinClass;
316def IIC_VFMACQ     : InstrItinClass;
317def IIC_VRECSD     : InstrItinClass;
318def IIC_VRECSQ     : InstrItinClass;
319def IIC_VCNTiD     : InstrItinClass;
320def IIC_VCNTiQ     : InstrItinClass;
321def IIC_VUNAiD     : InstrItinClass;
322def IIC_VUNAiQ     : InstrItinClass;
323def IIC_VQUNAiD    : InstrItinClass;
324def IIC_VQUNAiQ    : InstrItinClass;
325def IIC_VBINiD     : InstrItinClass;
326def IIC_VBINiQ     : InstrItinClass;
327def IIC_VSUBiD     : InstrItinClass;
328def IIC_VSUBiQ     : InstrItinClass;
329def IIC_VBINi4D    : InstrItinClass;
330def IIC_VBINi4Q    : InstrItinClass;
331def IIC_VSUBi4D    : InstrItinClass;
332def IIC_VSUBi4Q    : InstrItinClass;
333def IIC_VABAD      : InstrItinClass;
334def IIC_VABAQ      : InstrItinClass;
335def IIC_VSHLiD     : InstrItinClass;
336def IIC_VSHLiQ     : InstrItinClass;
337def IIC_VSHLi4D    : InstrItinClass;
338def IIC_VSHLi4Q    : InstrItinClass;
339def IIC_VPALiD     : InstrItinClass;
340def IIC_VPALiQ     : InstrItinClass;
341def IIC_VMULi16D   : InstrItinClass;
342def IIC_VMULi32D   : InstrItinClass;
343def IIC_VMULi16Q   : InstrItinClass;
344def IIC_VMULi32Q   : InstrItinClass;
345def IIC_VMACi16D   : InstrItinClass;
346def IIC_VMACi32D   : InstrItinClass;
347def IIC_VMACi16Q   : InstrItinClass;
348def IIC_VMACi32Q   : InstrItinClass;
349def IIC_VEXTD      : InstrItinClass;
350def IIC_VEXTQ      : InstrItinClass;
351def IIC_VTB1       : InstrItinClass;
352def IIC_VTB2       : InstrItinClass;
353def IIC_VTB3       : InstrItinClass;
354def IIC_VTB4       : InstrItinClass;
355def IIC_VTBX1      : InstrItinClass;
356def IIC_VTBX2      : InstrItinClass;
357def IIC_VTBX3      : InstrItinClass;
358def IIC_VTBX4      : InstrItinClass;
359
360//===----------------------------------------------------------------------===//
361// Processor instruction itineraries.
362
363include "ARMScheduleV6.td"
364include "ARMScheduleA8.td"
365include "ARMScheduleA9.td"
366include "ARMScheduleSwift.td"
367