1//===-- CIInstructions.td - CI Instruction Defintions ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Instruction definitions for CI and newer.
10//===----------------------------------------------------------------------===//
11// Remaining instructions:
12// S_CBRANCH_CDBGUSER
13// S_CBRANCH_CDBGSYS
14// S_CBRANCH_CDBGSYS_OR_USER
15// S_CBRANCH_CDBGSYS_AND_USER
16// DS_NOP
17// DS_GWS_SEMA_RELEASE_ALL
18// DS_WRAP_RTN_B32
19// DS_CNDXCHG32_RTN_B64
20// DS_WRITE_B96
21// DS_WRITE_B128
22// DS_CONDXCHG32_RTN_B128
23// DS_READ_B96
24// DS_READ_B128
25// BUFFER_LOAD_DWORDX3
26// BUFFER_STORE_DWORDX3
27
28//===----------------------------------------------------------------------===//
29// VOP1 Instructions
30//===----------------------------------------------------------------------===//
31
32let SubtargetPredicate = isCIVI in {
33
34let SchedRW = [WriteDoubleAdd] in {
35defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
36  VOP_F64_F64, ftrunc
37>;
38defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
39  VOP_F64_F64, fceil
40>;
41defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
42  VOP_F64_F64, ffloor
43>;
44defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
45  VOP_F64_F64, frint
46>;
47} // End SchedRW = [WriteDoubleAdd]
48
49let SchedRW = [WriteQuarterRate32] in {
50defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
51  VOP_F32_F32
52>;
53defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
54  VOP_F32_F32
55>;
56} // End SchedRW = [WriteQuarterRate32]
57
58//===----------------------------------------------------------------------===//
59// VOP3 Instructions
60//===----------------------------------------------------------------------===//
61
62defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
63  VOP_I32_I32_I32
64>;
65defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
66  VOP_I32_I32_I32
67>;
68defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
69  VOP_I32_I32_I32
70>;
71
72let isCommutable = 1 in {
73defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
74  VOP_I64_I32_I32_I64
75>;
76
77// XXX - Does this set VCC?
78defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
79  VOP_I64_I32_I32_I64
80>;
81} // End isCommutable = 1
82
83
84//===----------------------------------------------------------------------===//
85// DS Instructions
86//===----------------------------------------------------------------------===//
87defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
88
89// DS_CONDXCHG32_RTN_B64
90// DS_CONDXCHG32_RTN_B128
91
92//===----------------------------------------------------------------------===//
93// SMRD Instructions
94//===----------------------------------------------------------------------===//
95
96defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
97  "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
98
99//===----------------------------------------------------------------------===//
100// MUBUF Instructions
101//===----------------------------------------------------------------------===//
102
103let DisableSIDecoder = 1 in {
104defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
105  "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
106>;
107}
108
109//===----------------------------------------------------------------------===//
110// Flat Instructions
111//===----------------------------------------------------------------------===//
112
113defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
114  flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
115>;
116defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
117  flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
118>;
119defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
120  flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
121>;
122defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
123  flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
124;
125defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
126  flat<0xc, 0x14>, "flat_load_dword", VGPR_32
127>;
128defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
129  flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
130>;
131defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
132  flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
133>;
134defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
135  flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
136>;
137defm FLAT_STORE_BYTE : FLAT_Store_Helper <
138  flat<0x18>, "flat_store_byte", VGPR_32
139>;
140defm FLAT_STORE_SHORT : FLAT_Store_Helper <
141  flat <0x1a>, "flat_store_short", VGPR_32
142>;
143defm FLAT_STORE_DWORD : FLAT_Store_Helper <
144  flat<0x1c>, "flat_store_dword", VGPR_32
145>;
146defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
147  flat<0x1d>, "flat_store_dwordx2", VReg_64
148>;
149defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
150  flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
151>;
152defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
153  flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
154>;
155defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
156  flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat
157>;
158defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
159  flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32,
160    atomic_cmp_swap_flat, v2i32, VReg_64
161>;
162defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
163  flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat
164>;
165defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
166  flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat
167>;
168defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
169  flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat
170>;
171defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
172  flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat
173>;
174defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
175  flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat
176>;
177defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
178  flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat
179>;
180defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
181  flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat
182>;
183defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
184  flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat
185>;
186defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
187  flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat
188>;
189defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
190  flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat
191>;
192defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
193  flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat
194>;
195defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
196  flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat
197>;
198defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
199  flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64,
200    atomic_cmp_swap_flat, v2i64, VReg_128
201>;
202defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
203  flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat
204>;
205defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
206  flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat
207>;
208defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
209  flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat
210>;
211defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
212  flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat
213>;
214defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
215  flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat
216>;
217defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
218  flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat
219>;
220defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
221  flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat
222>;
223defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
224  flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat
225>;
226defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
227  flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat
228>;
229defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
230  flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat
231>;
232defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
233  flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat
234>;
235
236} // End SubtargetPredicate = isCIVI
237
238// CI Only flat instructions
239
240let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in {
241
242defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
243  flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32,
244    null_frag, v2f32, VReg_64
245>;
246defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
247  flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32
248>;
249defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
250  flat<0x40>, "flat_atomic_fmax", VGPR_32, f32
251>;
252defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
253  flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64,
254  null_frag, v2f64, VReg_128
255>;
256defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
257  flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64
258>;
259defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
260  flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64
261>;
262
263} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1
264
265//===----------------------------------------------------------------------===//
266// Flat Patterns
267//===----------------------------------------------------------------------===//
268
269let Predicates = [isCIVI] in {
270
271// Patterns for global loads with no offset.
272class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
273  (vt (node i64:$addr)),
274  (inst $addr, 0, 0, 0)
275>;
276
277class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
278  (vt (node i64:$addr)),
279  (inst $addr, 1, 0, 0)
280>;
281
282def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
283def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
284def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
285def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
286def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
287def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
288def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
289
290def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
291def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
292
293
294class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
295  (node vt:$data, i64:$addr),
296  (inst $addr, $data, 0, 0, 0)
297>;
298
299class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
300  // atomic store follows atomic binop convention so the address comes
301  // first.
302  (node i64:$addr, vt:$data),
303  (inst $addr, $data, 1, 0, 0)
304>;
305
306def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
307def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
308def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
309def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
310def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
311
312def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
313def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
314
315class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
316                     ValueType data_vt = vt> : Pat <
317  (vt (node i64:$addr, data_vt:$data)),
318  (inst $addr, $data, 0, 0)
319>;
320
321def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
322def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
323def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
324def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
325def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
326def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
327def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
328def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
329def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
330def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
331def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
332def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
333def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
334
335def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
336def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
337def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
338def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
339def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
340def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
341def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
342def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
343def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
344def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
345def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
346def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
347def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
348
349} // End Predicates = [isCIVI]
350