1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
3
4declare i32 @llvm.r600.read.tidig.x() nounwind readnone
5
6; FUNC-LABEL: {{^}}setcc_v2i32:
7; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
8; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
9
10; GCN: v_cmp_eq_u32_e32
11; GCN: v_cmp_eq_u32_e32
12define amdgpu_kernel void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) #0 {
13  %result = icmp eq <2 x i32> %a, %b
14  %sext = sext <2 x i1> %result to <2 x i32>
15  store <2 x i32> %sext, <2 x i32> addrspace(1)* %out
16  ret void
17}
18
19; FUNC-LABEL: {{^}}setcc_v4i32:
20; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24
25; GCN: v_cmp_eq_u32_e32
26; GCN: v_cmp_eq_u32_e32
27; GCN: v_cmp_eq_u32_e32
28; GCN: v_cmp_eq_u32_e32
29define amdgpu_kernel void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
30  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
31  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
32  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
33  %result = icmp eq <4 x i32> %a, %b
34  %sext = sext <4 x i1> %result to <4 x i32>
35  store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
36  ret void
37}
38
39;;;==========================================================================;;;
40;; Float comparisons
41;;;==========================================================================;;;
42
43; FUNC-LABEL: {{^}}f32_oeq:
44; R600: SETE_DX10
45; GCN: v_cmp_eq_f32
46define amdgpu_kernel void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) #0 {
47entry:
48  %0 = fcmp oeq float %a, %b
49  %1 = sext i1 %0 to i32
50  store i32 %1, i32 addrspace(1)* %out
51  ret void
52}
53
54; FUNC-LABEL: {{^}}f32_ogt:
55; R600: SETGT_DX10
56; GCN: v_cmp_gt_f32
57define amdgpu_kernel void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) #0 {
58entry:
59  %0 = fcmp ogt float %a, %b
60  %1 = sext i1 %0 to i32
61  store i32 %1, i32 addrspace(1)* %out
62  ret void
63}
64
65; FUNC-LABEL: {{^}}f32_oge:
66; R600: SETGE_DX10
67; GCN: v_cmp_ge_f32
68define amdgpu_kernel void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) #0 {
69entry:
70  %0 = fcmp oge float %a, %b
71  %1 = sext i1 %0 to i32
72  store i32 %1, i32 addrspace(1)* %out
73  ret void
74}
75
76; FUNC-LABEL: {{^}}f32_olt:
77; R600: SETGT_DX10
78; GCN: v_cmp_lt_f32
79define amdgpu_kernel void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) #0 {
80entry:
81  %0 = fcmp olt float %a, %b
82  %1 = sext i1 %0 to i32
83  store i32 %1, i32 addrspace(1)* %out
84  ret void
85}
86
87; FUNC-LABEL: {{^}}f32_ole:
88; R600: SETGE_DX10
89; GCN: v_cmp_le_f32
90define amdgpu_kernel void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) #0 {
91entry:
92  %0 = fcmp ole float %a, %b
93  %1 = sext i1 %0 to i32
94  store i32 %1, i32 addrspace(1)* %out
95  ret void
96}
97
98; FUNC-LABEL: {{^}}f32_one:
99; R600-DAG: SETE_DX10
100; R600-DAG: SETE_DX10
101; R600-DAG: AND_INT
102; R600-DAG: SETNE_DX10
103; R600-DAG: AND_INT
104; R600-DAG: SETNE_INT
105
106; GCN: v_cmp_lg_f32_e32 vcc
107; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
108define amdgpu_kernel void @f32_one(i32 addrspace(1)* %out, float %a, float %b) #0 {
109entry:
110  %0 = fcmp one float %a, %b
111  %1 = sext i1 %0 to i32
112  store i32 %1, i32 addrspace(1)* %out
113  ret void
114}
115
116; FUNC-LABEL: {{^}}f32_ord:
117; R600-DAG: SETE_DX10
118; R600-DAG: SETE_DX10
119; R600-DAG: AND_INT
120; R600-DAG: SETNE_INT
121; GCN: v_cmp_o_f32
122define amdgpu_kernel void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) #0 {
123entry:
124  %0 = fcmp ord float %a, %b
125  %1 = sext i1 %0 to i32
126  store i32 %1, i32 addrspace(1)* %out
127  ret void
128}
129
130; FUNC-LABEL: {{^}}f32_ueq:
131; R600-DAG: SETNE_DX10
132; R600-DAG: SETNE_DX10
133; R600-DAG: OR_INT
134; R600-DAG: SETE_DX10
135; R600-DAG: OR_INT
136; R600-DAG: SETNE_INT
137
138; GCN: v_cmp_nlg_f32_e32 vcc
139; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
140define amdgpu_kernel void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) #0 {
141entry:
142  %0 = fcmp ueq float %a, %b
143  %1 = sext i1 %0 to i32
144  store i32 %1, i32 addrspace(1)* %out
145  ret void
146}
147
148; FUNC-LABEL: {{^}}f32_ugt:
149; R600: SETGE
150; R600: SETE_DX10
151; GCN: v_cmp_nle_f32_e32 vcc
152; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
153define amdgpu_kernel void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) #0 {
154entry:
155  %0 = fcmp ugt float %a, %b
156  %1 = sext i1 %0 to i32
157  store i32 %1, i32 addrspace(1)* %out
158  ret void
159}
160
161; FUNC-LABEL: {{^}}f32_uge:
162; R600: SETGT
163; R600: SETE_DX10
164
165; GCN: v_cmp_nlt_f32_e32 vcc
166; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
167define amdgpu_kernel void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) #0 {
168entry:
169  %0 = fcmp uge float %a, %b
170  %1 = sext i1 %0 to i32
171  store i32 %1, i32 addrspace(1)* %out
172  ret void
173}
174
175; FUNC-LABEL: {{^}}f32_ult:
176; R600: SETGE
177; R600: SETE_DX10
178
179; GCN: v_cmp_nge_f32_e32 vcc
180; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
181define amdgpu_kernel void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) #0 {
182entry:
183  %0 = fcmp ult float %a, %b
184  %1 = sext i1 %0 to i32
185  store i32 %1, i32 addrspace(1)* %out
186  ret void
187}
188
189; FUNC-LABEL: {{^}}f32_ule:
190; R600: SETGT
191; R600: SETE_DX10
192
193; GCN: v_cmp_ngt_f32_e32 vcc
194; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
195define amdgpu_kernel void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) #0 {
196entry:
197  %0 = fcmp ule float %a, %b
198  %1 = sext i1 %0 to i32
199  store i32 %1, i32 addrspace(1)* %out
200  ret void
201}
202
203; FUNC-LABEL: {{^}}f32_une:
204; R600: SETNE_DX10
205; GCN: v_cmp_neq_f32
206define amdgpu_kernel void @f32_une(i32 addrspace(1)* %out, float %a, float %b) #0 {
207entry:
208  %0 = fcmp une float %a, %b
209  %1 = sext i1 %0 to i32
210  store i32 %1, i32 addrspace(1)* %out
211  ret void
212}
213
214; FUNC-LABEL: {{^}}f32_uno:
215; R600: SETNE_DX10
216; R600: SETNE_DX10
217; R600: OR_INT
218; R600: SETNE_INT
219; GCN: v_cmp_u_f32
220define amdgpu_kernel void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) #0 {
221entry:
222  %0 = fcmp uno float %a, %b
223  %1 = sext i1 %0 to i32
224  store i32 %1, i32 addrspace(1)* %out
225  ret void
226}
227
228;;;==========================================================================;;;
229;; 32-bit integer comparisons
230;;;==========================================================================;;;
231
232; FUNC-LABEL: {{^}}i32_eq:
233; R600: SETE_INT
234; GCN: v_cmp_eq_u32
235define amdgpu_kernel void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
236entry:
237  %0 = icmp eq i32 %a, %b
238  %1 = sext i1 %0 to i32
239  store i32 %1, i32 addrspace(1)* %out
240  ret void
241}
242
243; FUNC-LABEL: {{^}}i32_ne:
244; R600: SETNE_INT
245; GCN: v_cmp_ne_u32
246define amdgpu_kernel void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
247entry:
248  %0 = icmp ne i32 %a, %b
249  %1 = sext i1 %0 to i32
250  store i32 %1, i32 addrspace(1)* %out
251  ret void
252}
253
254; FUNC-LABEL: {{^}}i32_ugt:
255; R600: SETGT_UINT
256; GCN: v_cmp_gt_u32
257define amdgpu_kernel void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
258entry:
259  %0 = icmp ugt i32 %a, %b
260  %1 = sext i1 %0 to i32
261  store i32 %1, i32 addrspace(1)* %out
262  ret void
263}
264
265; FUNC-LABEL: {{^}}i32_uge:
266; R600: SETGE_UINT
267; GCN: v_cmp_ge_u32
268define amdgpu_kernel void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
269entry:
270  %0 = icmp uge i32 %a, %b
271  %1 = sext i1 %0 to i32
272  store i32 %1, i32 addrspace(1)* %out
273  ret void
274}
275
276; FUNC-LABEL: {{^}}i32_ult:
277; R600: SETGT_UINT
278; GCN: v_cmp_lt_u32
279define amdgpu_kernel void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
280entry:
281  %0 = icmp ult i32 %a, %b
282  %1 = sext i1 %0 to i32
283  store i32 %1, i32 addrspace(1)* %out
284  ret void
285}
286
287; FUNC-LABEL: {{^}}i32_ule:
288; R600: SETGE_UINT
289; GCN: v_cmp_le_u32
290define amdgpu_kernel void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
291entry:
292  %0 = icmp ule i32 %a, %b
293  %1 = sext i1 %0 to i32
294  store i32 %1, i32 addrspace(1)* %out
295  ret void
296}
297
298; FUNC-LABEL: {{^}}i32_sgt:
299; R600: SETGT_INT
300; GCN: v_cmp_gt_i32
301define amdgpu_kernel void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
302entry:
303  %0 = icmp sgt i32 %a, %b
304  %1 = sext i1 %0 to i32
305  store i32 %1, i32 addrspace(1)* %out
306  ret void
307}
308
309; FUNC-LABEL: {{^}}i32_sge:
310; R600: SETGE_INT
311; GCN: v_cmp_ge_i32
312define amdgpu_kernel void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
313entry:
314  %0 = icmp sge i32 %a, %b
315  %1 = sext i1 %0 to i32
316  store i32 %1, i32 addrspace(1)* %out
317  ret void
318}
319
320; FUNC-LABEL: {{^}}i32_slt:
321; R600: SETGT_INT
322; GCN: v_cmp_lt_i32
323define amdgpu_kernel void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
324entry:
325  %0 = icmp slt i32 %a, %b
326  %1 = sext i1 %0 to i32
327  store i32 %1, i32 addrspace(1)* %out
328  ret void
329}
330
331; FUNC-LABEL: {{^}}i32_sle:
332; R600: SETGE_INT
333; GCN: v_cmp_le_i32
334define amdgpu_kernel void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
335entry:
336  %0 = icmp sle i32 %a, %b
337  %1 = sext i1 %0 to i32
338  store i32 %1, i32 addrspace(1)* %out
339  ret void
340}
341
342; FIXME: This does 4 compares
343; FUNC-LABEL: {{^}}v3i32_eq:
344; GCN-DAG: v_cmp_eq_u32
345; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
346; GCN-DAG: v_cmp_eq_u32
347; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
348; GCN-DAG: v_cmp_eq_u32
349; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
350; GCN: s_endpgm
351define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 {
352  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
353  %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
354  %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
355  %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
356  %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
357  %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
358  %cmp = icmp eq <3 x i32> %a, %b
359  %ext = sext <3 x i1> %cmp to <3 x i32>
360  store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
361  ret void
362}
363
364; FUNC-LABEL: {{^}}v3i8_eq:
365; GCN-DAG: v_cmp_eq_u32
366; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
367; GCN-DAG: v_cmp_eq_u32
368; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
369; GCN-DAG: v_cmp_eq_u32
370; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
371; GCN: s_endpgm
372define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 {
373  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
374  %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
375  %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
376  %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
377  %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
378  %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
379  %cmp = icmp eq <3 x i8> %a, %b
380  %ext = sext <3 x i1> %cmp to <3 x i8>
381  store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
382  ret void
383}
384
385; Make sure we don't try to emit i1 setcc ops
386; FUNC-LABEL: setcc-i1
387; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 1
388; GCN: s_cmp_eq_u32 [[AND]], 0
389define amdgpu_kernel void @setcc-i1(i32 %in) #0 {
390  %and = and i32 %in, 1
391  %cmp = icmp eq i32 %and, 0
392  br i1 %cmp, label %endif, label %if
393if:
394  unreachable
395endif:
396  ret void
397}
398
399; FUNC-LABEL: setcc-i1-and-xor
400; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
401; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
402; GCN: s_or_b64 s[2:3], [[A]], [[B]]
403define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
404bb0:
405  %tmp5 = fcmp oge float %cond, 0.000000e+00
406  %tmp7 = fcmp ole float %cond, 1.000000e+00
407  %tmp9 = and i1 %tmp5, %tmp7
408  %tmp11 = xor i1 %tmp9, 1
409  br i1 %tmp11, label %bb2, label %bb1
410
411bb1:
412  store i32 0, i32 addrspace(1)* %out
413  br label %bb2
414
415bb2:
416  ret void
417}
418
419; FUNC-LABEL: setcc_v2i32_expand
420; GCN: v_cmp_gt_i32
421; GCN: v_cmp_gt_i32
422define amdgpu_kernel void @setcc_v2i32_expand(
423  <2 x i32> addrspace(1)* %a,
424  <2 x i32> addrspace(1)* %b,
425  <2 x i32> addrspace(1)* %c,
426  <2 x float> addrspace(1)* %r) {
427entry:
428  %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a
429  %b.val = load <2 x i32>, <2 x i32> addrspace(1)* %b
430  %c.val = load <2 x i32>, <2 x i32> addrspace(1)* %c
431
432  %icmp.val.1 = icmp sgt <2 x i32> %a.val, <i32 1, i32 1>
433  %zext.val.1 = zext <2 x i1> %icmp.val.1 to <2 x i32>
434  %shl.val.1 = shl nuw <2 x i32> %zext.val.1, <i32 31, i32 31>
435  %xor.val.1 = xor <2 x i32> %shl.val.1, %b.val
436  %bitcast.val.1 = bitcast <2 x i32> %xor.val.1 to <2 x float>
437  %icmp.val.2 = icmp sgt <2 x i32> %c.val, <i32 1199570944, i32 1199570944>
438  %select.val.1 = select <2 x i1> %icmp.val.2, <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> %bitcast.val.1
439
440  store <2 x float> %select.val.1, <2 x float> addrspace(1)* %r
441  ret void
442}
443
444; FUNC-LABEL: setcc_v4i32_expand
445; GCN: v_cmp_gt_i32
446; GCN: v_cmp_gt_i32
447; GCN: v_cmp_gt_i32
448; GCN: v_cmp_gt_i32
449define amdgpu_kernel void @setcc_v4i32_expand(
450  <4 x i32> addrspace(1)* %a,
451  <4 x i32> addrspace(1)* %b,
452  <4 x i32> addrspace(1)* %c,
453  <4 x float> addrspace(1)* %r) {
454entry:
455  %a.val = load <4 x i32>, <4 x i32> addrspace(1)* %a
456  %b.val = load <4 x i32>, <4 x i32> addrspace(1)* %b
457  %c.val = load <4 x i32>, <4 x i32> addrspace(1)* %c
458
459  %icmp.val.1 = icmp sgt <4 x i32> %a.val, <i32 1, i32 1, i32 1, i32 1>
460  %zext.val.1 = zext <4 x i1> %icmp.val.1 to <4 x i32>
461  %shl.val.1 = shl nuw <4 x i32> %zext.val.1, <i32 31, i32 31, i32 31, i32 31>
462  %xor.val.1 = xor <4 x i32> %shl.val.1, %b.val
463  %bitcast.val.1 = bitcast <4 x i32> %xor.val.1 to <4 x float>
464  %icmp.val.2 = icmp sgt <4 x i32> %c.val, <i32 1199570944, i32 1199570944, i32 1199570944, i32 1199570944>
465  %select.val.1 = select <4 x i1> %icmp.val.2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %bitcast.val.1
466
467  store <4 x float> %select.val.1, <4 x float> addrspace(1)* %r
468  ret void
469}
470
471attributes #0 = { nounwind }
472