1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL                %s
2
3; RUN: llc -march=amdgcn -mcpu=verde   -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI    %s
4; RUN: llc -march=amdgcn -mcpu=fiji    -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI      %s
5; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9    %s
6; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
7
8; GCN-ISEL-LABEL: name:   sadd64rr
9; GCN-ISEL-LABEL: body:
10; GCN-ISEL-LABEL: bb.0.entry:
11; GCN-ISEL: S_ADD_U64_PSEUDO
12
13; GCN-LABEL: @sadd64rr
14; GCN:       s_add_u32
15; GCN:       s_addc_u32
16define amdgpu_kernel void @sadd64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) {
17entry:
18  %add = add i64 %a, %b
19  store i64 %add, i64 addrspace(1)* %out
20  ret void
21}
22
23; GCN-ISEL-LABEL: name:   sadd64ri
24; GCN-ISEL-LABEL: body:
25; GCN-ISEL-LABEL: bb.0.entry:
26; GCN-ISEL: S_ADD_U64_PSEUDO
27
28; GCN-LABEL: @sadd64ri
29; GCN:       s_add_u32  s{{[0-9]+}}, s{{[0-9]+}}, 0x56789876
30; GCN:       s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1234
31define amdgpu_kernel void @sadd64ri(i64 addrspace(1)* %out, i64 %a) {
32entry:
33  %add = add i64 20015998343286, %a
34  store i64 %add, i64 addrspace(1)* %out
35  ret void
36}
37
38; GCN-ISEL-LABEL: name:   vadd64rr
39; GCN-ISEL-LABEL: body:
40; GCN-ISEL-LABEL: bb.0.entry:
41; GCN-ISEL: V_ADD_U64_PSEUDO
42
43; GCN-LABEL: @vadd64rr
44;
45; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
46; CISI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
47;
48; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
49; VI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
50;
51; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
52; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
53;
54; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
55; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
56define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) {
57entry:
58  %tid = call i32 @llvm.amdgcn.workitem.id.x()
59  %tid.ext = sext i32 %tid to i64
60  %add = add i64 %a, %tid.ext
61  store i64 %add, i64 addrspace(1)* %out
62  ret void
63}
64
65; GCN-ISEL-LABEL: name:   vadd64ri
66; GCN-ISEL-LABEL: body:
67; GCN-ISEL-LABEL: bb.0.entry:
68; GCN-ISEL: V_ADD_U64_PSEUDO
69
70; GCN-LABEL: @vadd64ri
71;
72; CISI:	v_add_i32_e32 v0, vcc, 0x56789876, v0
73; CISI:	v_mov_b32_e32 v1, 0x1234
74; CISI: v_addc_u32_e32 v1, vcc, 0, v1, vcc
75;
76; VI: v_add_u32_e32 v0, vcc, 0x56789876, v0
77; VI: v_mov_b32_e32 v1, 0x1234
78; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc
79;
80; GFX9:	v_add_co_u32_e32 v0, vcc, 0x56789876, v0
81; GFX9: v_mov_b32_e32 v1, 0x1234
82; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
83;
84; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
85; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]]
86define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) {
87entry:
88  %tid = call i32 @llvm.amdgcn.workitem.id.x()
89  %tid.ext = sext i32 %tid to i64
90  %add = add i64 20015998343286, %tid.ext
91  store i64 %add, i64 addrspace(1)* %out
92  ret void
93}
94
95; GCN-ISEL-LABEL: name:   suaddo32
96; GCN-ISEL-LABEL: body:
97; GCN-ISEL-LABEL: bb.0
98; GCN-ISEL: S_ADD_I32
99define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
100  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
101  %val = extractvalue { i32, i1 } %uadd, 0
102  %carry = extractvalue { i32, i1 } %uadd, 1
103  store i32 %val, i32 addrspace(1)* %out, align 4
104  ret void
105}
106
107
108; GCN-ISEL-LABEL: name:   uaddo32_vcc_user
109; GCN-ISEL-LABEL: body:
110; GCN-ISEL-LABEL: bb.0
111; GCN-ISEL: V_ADD_CO_U32_e64
112
113; below we check selection to v_add/addc
114; because the only user of VCC produced by the UADDOis v_cndmask.
115; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
116
117; GCN-LABEL: @uaddo32_vcc_user
118;
119; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
120; CISI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
121;
122; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
123; VI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
124;
125; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
126; GFX9:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
127;
128; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
129; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
130define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
131  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
132  %val = extractvalue { i32, i1 } %uadd, 0
133  %carry = extractvalue { i32, i1 } %uadd, 1
134  store i32 %val, i32 addrspace(1)* %out, align 4
135  store i1 %carry, i1 addrspace(1)* %carryout
136  ret void
137}
138
139; GCN-ISEL-LABEL: name:   suaddo64
140; GCN-ISEL-LABEL: body:
141; GCN-ISEL-LABEL: bb.0
142; GCN-ISEL: S_ADD_U64_PSEUDO
143
144; GCN-LABEL: @suaddo64
145;
146; GCN: s_add_u32
147; GCN: s_addc_u32
148define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 {
149  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
150  %val = extractvalue { i64, i1 } %uadd, 0
151  %carry = extractvalue { i64, i1 } %uadd, 1
152  store i64 %val, i64 addrspace(1)* %out, align 8
153  store i1 %carry, i1 addrspace(1)* %carryout
154  ret void
155}
156
157; GCN-ISEL-LABEL: name:   vuaddo64
158; GCN-ISEL-LABEL: body:
159; GCN-ISEL-LABEL: bb.0
160; GCN-ISEL: V_ADD_U64_PSEUDO
161
162; GCN-LABEL: @vuaddo64
163;
164; CISI:	v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
165; CISI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
166;
167; VI:	v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
168; VI:	v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
169;
170; GFX9:	v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
171; GFX9:	v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
172;
173; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
174; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
175define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
176  %tid = call i32 @llvm.amdgcn.workitem.id.x()
177  %tid.ext = sext i32 %tid to i64
178  %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %tid.ext)
179  %val = extractvalue { i64, i1 } %uadd, 0
180  %carry = extractvalue { i64, i1 } %uadd, 1
181  store i64 %val, i64 addrspace(1)* %out, align 8
182  store i1 %carry, i1 addrspace(1)* %carryout
183  ret void
184}
185
186; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL                %s
187
188; RUN: llc -march=amdgcn -mcpu=verde   -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI    %s
189; RUN: llc -march=amdgcn -mcpu=fiji    -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI      %s
190; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9    %s
191; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
192
193; GCN-ISEL-LABEL: name:   ssub64rr
194; GCN-ISEL-LABEL: body:
195; GCN-ISEL-LABEL: bb.0.entry:
196; GCN-ISEL: S_SUB_U64_PSEUDO
197
198; GCN-LABEL: @ssub64rr
199; GCN:       s_sub_u32
200; GCN:       s_subb_u32
201define amdgpu_kernel void @ssub64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) {
202entry:
203  %sub = sub i64 %a, %b
204  store i64 %sub, i64 addrspace(1)* %out
205  ret void
206}
207
208; GCN-ISEL-LABEL: name:   ssub64ri
209; GCN-ISEL-LABEL: body:
210; GCN-ISEL-LABEL: bb.0.entry:
211; GCN-ISEL: S_SUB_U64_PSEUDO
212
213; GCN-LABEL: @ssub64ri
214; GCN:       s_sub_u32  s{{[0-9]+}}, 0x56789876, s{{[0-9]+}}
215; GCN:       s_subb_u32 s{{[0-9]+}}, 0x1234, s{{[0-9]+}}
216define amdgpu_kernel void @ssub64ri(i64 addrspace(1)* %out, i64 %a) {
217entry:
218  %sub = sub i64 20015998343286, %a
219  store i64 %sub, i64 addrspace(1)* %out
220  ret void
221}
222
223; GCN-ISEL-LABEL: name:   vsub64rr
224; GCN-ISEL-LABEL: body:
225; GCN-ISEL-LABEL: bb.0.entry:
226; GCN-ISEL: V_SUB_U64_PSEUDO
227
228; GCN-LABEL: @vsub64rr
229;
230; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
231; CISI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
232;
233; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
234; VI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
235;
236; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
237; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
238;
239; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
240; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
241define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) {
242entry:
243  %tid = call i32 @llvm.amdgcn.workitem.id.x()
244  %tid.ext = sext i32 %tid to i64
245  %sub = sub i64 %a, %tid.ext
246  store i64 %sub, i64 addrspace(1)* %out
247  ret void
248}
249
250; GCN-ISEL-LABEL: name:   vsub64ri
251; GCN-ISEL-LABEL: body:
252; GCN-ISEL-LABEL: bb.0.entry:
253; GCN-ISEL: V_SUB_U64_PSEUDO
254
255; GCN-LABEL: @vsub64ri
256;
257; CISI:	v_sub_i32_e32 v0, vcc, 0x56789876, v0
258; CISI:	v_mov_b32_e32 v1, 0x1234
259; CISI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
260;
261; VI: v_sub_u32_e32 v0, vcc, 0x56789876, v0
262; VI: v_mov_b32_e32 v1, 0x1234
263; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
264;
265; GFX9:	v_sub_co_u32_e32 v0, vcc, 0x56789876, v0
266; GFX9: v_mov_b32_e32 v1, 0x1234
267; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
268;
269; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
270; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]]
271define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) {
272entry:
273  %tid = call i32 @llvm.amdgcn.workitem.id.x()
274  %tid.ext = sext i32 %tid to i64
275  %sub = sub i64 20015998343286, %tid.ext
276  store i64 %sub, i64 addrspace(1)* %out
277  ret void
278}
279
280; GCN-ISEL-LABEL: name:   susubo32
281; GCN-ISEL-LABEL: body:
282; GCN-ISEL-LABEL: bb.0
283; GCN-ISEL: S_SUB_I32
284define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
285  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
286  %val = extractvalue { i32, i1 } %usub, 0
287  %carry = extractvalue { i32, i1 } %usub, 1
288  store i32 %val, i32 addrspace(1)* %out, align 4
289  ret void
290}
291
292
293; GCN-ISEL-LABEL: name:   usubo32_vcc_user
294; GCN-ISEL-LABEL: body:
295; GCN-ISEL-LABEL: bb.0
296; GCN-ISEL: V_SUB_CO_U32_e64
297
298; below we check selection to v_sub/subb
299; because the only user of VCC produced by the USUBOis v_cndmask.
300; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC
301
302; GCN-LABEL: @usubo32_vcc_user
303;
304; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
305; CISI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
306;
307; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
308; VI:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
309;
310; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
311; GFX9:	v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
312;
313; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}}
314; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]]
315define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 {
316  %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
317  %val = extractvalue { i32, i1 } %usub, 0
318  %carry = extractvalue { i32, i1 } %usub, 1
319  store i32 %val, i32 addrspace(1)* %out, align 4
320  store i1 %carry, i1 addrspace(1)* %carryout
321  ret void
322}
323
324; GCN-ISEL-LABEL: name:   susubo64
325; GCN-ISEL-LABEL: body:
326; GCN-ISEL-LABEL: bb.0
327; GCN-ISEL: S_SUB_U64_PSEUDO
328
329; GCN-LABEL: @susubo64
330;
331; GCN: s_sub_u32
332; GCN: s_subb_u32
333define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 {
334  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
335  %val = extractvalue { i64, i1 } %usub, 0
336  %carry = extractvalue { i64, i1 } %usub, 1
337  store i64 %val, i64 addrspace(1)* %out, align 8
338  store i1 %carry, i1 addrspace(1)* %carryout
339  ret void
340}
341
342; GCN-ISEL-LABEL: name:   vusubo64
343; GCN-ISEL-LABEL: body:
344; GCN-ISEL-LABEL: bb.0
345; GCN-ISEL: V_SUB_U64_PSEUDO
346
347; GCN-LABEL: @vusubo64
348;
349; CISI:	v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
350; CISI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
351;
352; VI:	v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
353; VI:	v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
354;
355; GFX9:	v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0
356; GFX9:	v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
357;
358; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
359; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
360define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
361  %tid = call i32 @llvm.amdgcn.workitem.id.x()
362  %tid.ext = sext i32 %tid to i64
363  %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %tid.ext)
364  %val = extractvalue { i64, i1 } %usub, 0
365  %carry = extractvalue { i64, i1 } %usub, 1
366  store i64 %val, i64 addrspace(1)* %out, align 8
367  store i1 %carry, i1 addrspace(1)* %carryout
368  ret void
369}
370
371; GCN-ISEL-LABEL: name:   sudiv64
372; GCN-ISEL-LABEL: body:
373; GCN-ISEL-LABEL: bb.3
374; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64
375; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]]
376; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64
377; GCN-ISEL: S_SUB_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]]
378define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
379  %result = udiv i64 %x, %y
380  store i64 %result, i64 addrspace(1)* %out
381  ret void
382}
383
384
385
386declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
387
388declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
389
390declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
391
392declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
393
394declare i32 @llvm.amdgcn.workitem.id.x() #1
395
396attributes #0 = { nounwind }
397attributes #1 = { nounwind readnone }
398
399