1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
3
4declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
5declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
6declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
7declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
8
9; GCN-LABEL: {{^}}v_icmp_i32_eq:
10; GCN: v_cmp_eq_u32_e64
11define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
12  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
13  store i64 %result, i64 addrspace(1)* %out
14  ret void
15}
16
17; GCN-LABEL: {{^}}v_icmp_i32:
18; GCN-NOT: v_cmp_eq_u32_e64
19define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
20  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
21  store i64 %result, i64 addrspace(1)* %out
22  ret void
23}
24
25; GCN-LABEL: {{^}}v_icmp_i32_ne:
26; GCN: v_cmp_ne_u32_e64
27define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
28  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
29  store i64 %result, i64 addrspace(1)* %out
30  ret void
31}
32
33; GCN-LABEL: {{^}}v_icmp_i32_ugt:
34; GCN: v_cmp_gt_u32_e64
35define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
36  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
37  store i64 %result, i64 addrspace(1)* %out
38  ret void
39}
40
41; GCN-LABEL: {{^}}v_icmp_i32_uge:
42; GCN: v_cmp_ge_u32_e64
43define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
44  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
45  store i64 %result, i64 addrspace(1)* %out
46  ret void
47}
48
49; GCN-LABEL: {{^}}v_icmp_i32_ult:
50; GCN: v_cmp_lt_u32_e64
51define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
52  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
53  store i64 %result, i64 addrspace(1)* %out
54  ret void
55}
56
57; GCN-LABEL: {{^}}v_icmp_i32_ule:
58; GCN: v_cmp_le_u32_e64
59define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
60  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
61  store i64 %result, i64 addrspace(1)* %out
62  ret void
63}
64
65; GCN-LABEL: {{^}}v_icmp_i32_sgt:
66; GCN: v_cmp_gt_i32_e64
67define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
68  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
69  store i64 %result, i64 addrspace(1)* %out
70  ret void
71}
72
73; GCN-LABEL: {{^}}v_icmp_i32_sge:
74; GCN: v_cmp_ge_i32_e64
75define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
76  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
77  store i64 %result, i64 addrspace(1)* %out
78  ret void
79}
80
81; GCN-LABEL: {{^}}v_icmp_i32_slt:
82; GCN: v_cmp_lt_i32_e64
83define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
84  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
85  store i64 %result, i64 addrspace(1)* %out
86  ret void
87}
88; GCN-LABEL: {{^}}v_icmp_i32_sle:
89; GCN: v_cmp_le_i32_e64
90define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
91  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
92  store i64 %result, i64 addrspace(1)* %out
93  ret void
94}
95
96; GCN-LABEL: {{^}}v_icmp_i64_eq:
97; GCN: v_cmp_eq_u64_e64
98define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
99  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
100  store i64 %result, i64 addrspace(1)* %out
101  ret void
102}
103
104; GCN-LABEL: {{^}}v_icmp_i64_ne:
105; GCN: v_cmp_ne_u64_e64
106define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
107  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
108  store i64 %result, i64 addrspace(1)* %out
109  ret void
110}
111
112; GCN-LABEL: {{^}}v_icmp_u64_ugt:
113; GCN: v_cmp_gt_u64_e64
114define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
115  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
116  store i64 %result, i64 addrspace(1)* %out
117  ret void
118}
119
120; GCN-LABEL: {{^}}v_icmp_u64_uge:
121; GCN: v_cmp_ge_u64_e64
122define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
123  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
124  store i64 %result, i64 addrspace(1)* %out
125  ret void
126}
127
128; GCN-LABEL: {{^}}v_icmp_u64_ult:
129; GCN: v_cmp_lt_u64_e64
130define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
131  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
132  store i64 %result, i64 addrspace(1)* %out
133  ret void
134}
135
136; GCN-LABEL: {{^}}v_icmp_u64_ule:
137; GCN: v_cmp_le_u64_e64
138define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
139  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
140  store i64 %result, i64 addrspace(1)* %out
141  ret void
142}
143
144; GCN-LABEL: {{^}}v_icmp_i64_sgt:
145; GCN: v_cmp_gt_i64_e64
146define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
147  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
148  store i64 %result, i64 addrspace(1)* %out
149  ret void
150}
151
152; GCN-LABEL: {{^}}v_icmp_i64_sge:
153; GCN: v_cmp_ge_i64_e64
154define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
155  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
156  store i64 %result, i64 addrspace(1)* %out
157  ret void
158}
159
160; GCN-LABEL: {{^}}v_icmp_i64_slt:
161; GCN: v_cmp_lt_i64_e64
162define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
163  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
164  store i64 %result, i64 addrspace(1)* %out
165  ret void
166}
167; GCN-LABEL: {{^}}v_icmp_i64_sle:
168; GCN: v_cmp_le_i64_e64
169define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
170  %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
171  store i64 %result, i64 addrspace(1)* %out
172  ret void
173}
174
175; VI: v_cmp_eq_u16_e64
176
177; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
178; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
179; SI: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
180define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
181  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
182  store i64 %result, i64 addrspace(1)* %out
183  ret void
184}
185
186; GCN-LABEL: {{^}}v_icmp_i16:
187; GCN-NOT: v_cmp_eq_
188define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
189  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
190  store i64 %result, i64 addrspace(1)* %out
191  ret void
192}
193; GCN-LABEL: {{^}}v_icmp_i16_ne:
194; VI: v_cmp_ne_u16_e64
195
196; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
197; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
198; SI: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
199define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
200  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
201  store i64 %result, i64 addrspace(1)* %out
202  ret void
203}
204
205; GCN-LABEL: {{^}}v_icmp_i16_ugt:
206; VI: v_cmp_gt_u16_e64
207
208; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
209; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
210; SI: v_cmp_gt_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
211define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
212  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
213  store i64 %result, i64 addrspace(1)* %out
214  ret void
215}
216
217; GCN-LABEL: {{^}}v_icmp_i16_uge:
218; VI: v_cmp_ge_u16_e64
219
220; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
221; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
222; SI: v_cmp_ge_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
223define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
224  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
225  store i64 %result, i64 addrspace(1)* %out
226  ret void
227}
228
229; GCN-LABEL: {{^}}v_icmp_i16_ult:
230; VI: v_cmp_lt_u16_e64
231
232; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
233; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
234; SI: v_cmp_lt_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
235define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
236  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
237  store i64 %result, i64 addrspace(1)* %out
238  ret void
239}
240
241; GCN-LABEL: {{^}}v_icmp_i16_ule:
242; VI: v_cmp_le_u16_e64
243
244; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
245; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
246; SI: v_cmp_le_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
247define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
248  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
249  store i64 %result, i64 addrspace(1)* %out
250  ret void
251}
252
253; GCN-LABEL: {{^}}v_icmp_i16_sgt:
254; VI: v_cmp_gt_i16_e64
255
256; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
257; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
258; SI: v_cmp_gt_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
259define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
260  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
261  store i64 %result, i64 addrspace(1)* %out
262  ret void
263}
264
265; GCN-LABEL: {{^}}v_icmp_i16_sge:
266; VI: v_cmp_ge_i16_e64
267
268; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
269; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
270; SI: v_cmp_ge_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
271define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
272  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
273  store i64 %result, i64 addrspace(1)* %out
274  ret void
275}
276
277; GCN-LABEL: {{^}}v_icmp_i16_slt:
278; VI: v_cmp_lt_i16_e64
279
280; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
281; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
282; SI: v_cmp_lt_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
283define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
284  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
285  store i64 %result, i64 addrspace(1)* %out
286  ret void
287}
288; GCN-LABEL: {{^}}v_icmp_i16_sle:
289; VI: v_cmp_le_i16_e64
290
291; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
292; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
293; SI: v_cmp_le_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
294define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
295  %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
296  store i64 %result, i64 addrspace(1)* %out
297  ret void
298}
299
300; GCN-LABEL: {{^}}v_icmp_i1_ne0:
301; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
302; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
303; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
304; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
305; GCN-NEXT: v_mov_b32_e32
306; GCN-NEXT: v_mov_b32_e32
307; GCN: {{global|flat|buffer}}_store_dwordx2
308define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
309  %c0 = icmp ugt i32 %a, 1
310  %c1 = icmp ugt i32 %b, 2
311  %src = and i1 %c0, %c1
312  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
313  store i64 %result, i64 addrspace(1)* %out
314  ret void
315}
316
317attributes #0 = { nounwind readnone convergent }
318