1; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2
3declare i32 @llvm.amdgcn.workitem.id.x() #0
4
5; --------------------------------------------------------------------------------
6; i32 compares
7; --------------------------------------------------------------------------------
8
9; GCN-LABEL: {{^}}commute_eq_64_i32:
10; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
11define amdgpu_kernel void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
12  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
13  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
14  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
15  %val = load i32, i32 addrspace(1)* %gep.in
16  %cmp = icmp eq i32 %val, 64
17  %ext = sext i1 %cmp to i32
18  store i32 %ext, i32 addrspace(1)* %gep.out
19  ret void
20}
21
22; GCN-LABEL: {{^}}commute_ne_64_i32:
23; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
24define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
25  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
26  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
27  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
28  %val = load i32, i32 addrspace(1)* %gep.in
29  %cmp = icmp ne i32 %val, 64
30  %ext = sext i1 %cmp to i32
31  store i32 %ext, i32 addrspace(1)* %gep.out
32  ret void
33}
34
35; FIXME: Why isn't this being folded as a constant?
36; GCN-LABEL: {{^}}commute_ne_litk_i32:
37; GCN: s_movk_i32 [[K:s[0-9]+]], 0x3039
38; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
39define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
40  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
41  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
42  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
43  %val = load i32, i32 addrspace(1)* %gep.in
44  %cmp = icmp ne i32 %val, 12345
45  %ext = sext i1 %cmp to i32
46  store i32 %ext, i32 addrspace(1)* %gep.out
47  ret void
48}
49
50; GCN-LABEL: {{^}}commute_ugt_64_i32:
51; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
52define amdgpu_kernel void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
53  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
54  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
55  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
56  %val = load i32, i32 addrspace(1)* %gep.in
57  %cmp = icmp ugt i32 %val, 64
58  %ext = sext i1 %cmp to i32
59  store i32 %ext, i32 addrspace(1)* %gep.out
60  ret void
61}
62
63; GCN-LABEL: {{^}}commute_uge_64_i32:
64; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
65define amdgpu_kernel void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
66  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
67  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
68  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
69  %val = load i32, i32 addrspace(1)* %gep.in
70  %cmp = icmp uge i32 %val, 64
71  %ext = sext i1 %cmp to i32
72  store i32 %ext, i32 addrspace(1)* %gep.out
73  ret void
74}
75
76; GCN-LABEL: {{^}}commute_ult_64_i32:
77; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
78define amdgpu_kernel void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
79  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
80  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
81  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
82  %val = load i32, i32 addrspace(1)* %gep.in
83  %cmp = icmp ult i32 %val, 64
84  %ext = sext i1 %cmp to i32
85  store i32 %ext, i32 addrspace(1)* %gep.out
86  ret void
87}
88
89; GCN-LABEL: {{^}}commute_ule_63_i32:
90; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
91define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
92  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
93  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
94  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
95  %val = load i32, i32 addrspace(1)* %gep.in
96  %cmp = icmp ule i32 %val, 63
97  %ext = sext i1 %cmp to i32
98  store i32 %ext, i32 addrspace(1)* %gep.out
99  ret void
100}
101
102; GCN-LABEL: {{^}}commute_ule_64_i32:
103; GCN: s_movk_i32 [[K:s[0-9]+]], 0x41{{$}}
104; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
105define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
106  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
107  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
108  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
109  %val = load i32, i32 addrspace(1)* %gep.in
110  %cmp = icmp ule i32 %val, 64
111  %ext = sext i1 %cmp to i32
112  store i32 %ext, i32 addrspace(1)* %gep.out
113  ret void
114}
115
116; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
117; GCN: v_ashrrev_i32_e32 v2, 31, v2
118define amdgpu_kernel void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
119  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
120  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
121  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
122  %val = load i32, i32 addrspace(1)* %gep.in
123  %cmp = icmp sgt i32 %val, -1
124  %ext = sext i1 %cmp to i32
125  store i32 %ext, i32 addrspace(1)* %gep.out
126  ret void
127}
128
129; GCN-LABEL: {{^}}commute_sge_neg2_i32:
130; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
131define amdgpu_kernel void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
132  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
133  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
134  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
135  %val = load i32, i32 addrspace(1)* %gep.in
136  %cmp = icmp sge i32 %val, -2
137  %ext = sext i1 %cmp to i32
138  store i32 %ext, i32 addrspace(1)* %gep.out
139  ret void
140}
141
142; GCN-LABEL: {{^}}commute_slt_neg16_i32:
143; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
144define amdgpu_kernel void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
145  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
146  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
147  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
148  %val = load i32, i32 addrspace(1)* %gep.in
149  %cmp = icmp slt i32 %val, -16
150  %ext = sext i1 %cmp to i32
151  store i32 %ext, i32 addrspace(1)* %gep.out
152  ret void
153}
154
155; GCN-LABEL: {{^}}commute_sle_5_i32:
156; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
157define amdgpu_kernel void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
158  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
159  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
160  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
161  %val = load i32, i32 addrspace(1)* %gep.in
162  %cmp = icmp sle i32 %val, 5
163  %ext = sext i1 %cmp to i32
164  store i32 %ext, i32 addrspace(1)* %gep.out
165  ret void
166}
167
168; --------------------------------------------------------------------------------
169; i64 compares
170; --------------------------------------------------------------------------------
171
172; GCN-LABEL: {{^}}commute_eq_64_i64:
173; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
174define amdgpu_kernel void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
175  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
176  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
177  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
178  %val = load i64, i64 addrspace(1)* %gep.in
179  %cmp = icmp eq i64 %val, 64
180  %ext = sext i1 %cmp to i32
181  store i32 %ext, i32 addrspace(1)* %gep.out
182  ret void
183}
184
185; GCN-LABEL: {{^}}commute_ne_64_i64:
186; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
187define amdgpu_kernel void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
188  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
189  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
190  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
191  %val = load i64, i64 addrspace(1)* %gep.in
192  %cmp = icmp ne i64 %val, 64
193  %ext = sext i1 %cmp to i32
194  store i32 %ext, i32 addrspace(1)* %gep.out
195  ret void
196}
197
198; GCN-LABEL: {{^}}commute_ugt_64_i64:
199; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
200define amdgpu_kernel void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
201  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
202  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
203  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
204  %val = load i64, i64 addrspace(1)* %gep.in
205  %cmp = icmp ugt i64 %val, 64
206  %ext = sext i1 %cmp to i32
207  store i32 %ext, i32 addrspace(1)* %gep.out
208  ret void
209}
210
211; GCN-LABEL: {{^}}commute_uge_64_i64:
212; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
213define amdgpu_kernel void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
214  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
215  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
216  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
217  %val = load i64, i64 addrspace(1)* %gep.in
218  %cmp = icmp uge i64 %val, 64
219  %ext = sext i1 %cmp to i32
220  store i32 %ext, i32 addrspace(1)* %gep.out
221  ret void
222}
223
224; GCN-LABEL: {{^}}commute_ult_64_i64:
225; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
226define amdgpu_kernel void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
227  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
228  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
229  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
230  %val = load i64, i64 addrspace(1)* %gep.in
231  %cmp = icmp ult i64 %val, 64
232  %ext = sext i1 %cmp to i32
233  store i32 %ext, i32 addrspace(1)* %gep.out
234  ret void
235}
236
237; GCN-LABEL: {{^}}commute_ule_63_i64:
238; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
239define amdgpu_kernel void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
240  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
241  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
242  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
243  %val = load i64, i64 addrspace(1)* %gep.in
244  %cmp = icmp ule i64 %val, 63
245  %ext = sext i1 %cmp to i32
246  store i32 %ext, i32 addrspace(1)* %gep.out
247  ret void
248}
249
250; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
251
252; GCN-LABEL: {{^}}commute_ule_64_i64:
253; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
254; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
255define amdgpu_kernel void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
256  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
257  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
258  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
259  %val = load i64, i64 addrspace(1)* %gep.in
260  %cmp = icmp ule i64 %val, 64
261  %ext = sext i1 %cmp to i32
262  store i32 %ext, i32 addrspace(1)* %gep.out
263  ret void
264}
265
266; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
267; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
268define amdgpu_kernel void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
269  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
270  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
271  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
272  %val = load i64, i64 addrspace(1)* %gep.in
273  %cmp = icmp sgt i64 %val, -1
274  %ext = sext i1 %cmp to i32
275  store i32 %ext, i32 addrspace(1)* %gep.out
276  ret void
277}
278
279; GCN-LABEL: {{^}}commute_sge_neg2_i64:
280; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
281define amdgpu_kernel void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
282  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
283  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
284  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
285  %val = load i64, i64 addrspace(1)* %gep.in
286  %cmp = icmp sge i64 %val, -2
287  %ext = sext i1 %cmp to i32
288  store i32 %ext, i32 addrspace(1)* %gep.out
289  ret void
290}
291
292; GCN-LABEL: {{^}}commute_slt_neg16_i64:
293; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
294define amdgpu_kernel void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
295  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
296  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
297  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
298  %val = load i64, i64 addrspace(1)* %gep.in
299  %cmp = icmp slt i64 %val, -16
300  %ext = sext i1 %cmp to i32
301  store i32 %ext, i32 addrspace(1)* %gep.out
302  ret void
303}
304
305; GCN-LABEL: {{^}}commute_sle_5_i64:
306; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
307define amdgpu_kernel void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
308  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
309  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
310  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
311  %val = load i64, i64 addrspace(1)* %gep.in
312  %cmp = icmp sle i64 %val, 5
313  %ext = sext i1 %cmp to i32
314  store i32 %ext, i32 addrspace(1)* %gep.out
315  ret void
316}
317
318; --------------------------------------------------------------------------------
319; f32 compares
320; --------------------------------------------------------------------------------
321
322
323; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
324; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
325define amdgpu_kernel void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
326  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
327  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
328  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
329  %val = load float, float addrspace(1)* %gep.in
330  %cmp = fcmp oeq float %val, 2.0
331  %ext = sext i1 %cmp to i32
332  store i32 %ext, i32 addrspace(1)* %gep.out
333  ret void
334}
335
336
337; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
338; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
339define amdgpu_kernel void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
340  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
341  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
342  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
343  %val = load float, float addrspace(1)* %gep.in
344  %cmp = fcmp ogt float %val, 2.0
345  %ext = sext i1 %cmp to i32
346  store i32 %ext, i32 addrspace(1)* %gep.out
347  ret void
348}
349
350; GCN-LABEL: {{^}}commute_oge_2.0_f32:
351; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
352define amdgpu_kernel void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
353  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
354  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
355  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
356  %val = load float, float addrspace(1)* %gep.in
357  %cmp = fcmp oge float %val, 2.0
358  %ext = sext i1 %cmp to i32
359  store i32 %ext, i32 addrspace(1)* %gep.out
360  ret void
361}
362
363; GCN-LABEL: {{^}}commute_olt_2.0_f32:
364; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
365define amdgpu_kernel void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
366  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
367  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
368  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
369  %val = load float, float addrspace(1)* %gep.in
370  %cmp = fcmp olt float %val, 2.0
371  %ext = sext i1 %cmp to i32
372  store i32 %ext, i32 addrspace(1)* %gep.out
373  ret void
374}
375
376; GCN-LABEL: {{^}}commute_ole_2.0_f32:
377; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
378define amdgpu_kernel void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
379  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
380  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
381  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
382  %val = load float, float addrspace(1)* %gep.in
383  %cmp = fcmp ole float %val, 2.0
384  %ext = sext i1 %cmp to i32
385  store i32 %ext, i32 addrspace(1)* %gep.out
386  ret void
387}
388
389; GCN-LABEL: {{^}}commute_one_2.0_f32:
390; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
391define amdgpu_kernel void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
392  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
393  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
394  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
395  %val = load float, float addrspace(1)* %gep.in
396  %cmp = fcmp one float %val, 2.0
397  %ext = sext i1 %cmp to i32
398  store i32 %ext, i32 addrspace(1)* %gep.out
399  ret void
400}
401
402; GCN-LABEL: {{^}}commute_ord_2.0_f32:
403; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
404define amdgpu_kernel void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
405  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
406  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
407  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
408  %val = load float, float addrspace(1)* %gep.in
409  %cmp = fcmp ord float %val, 2.0
410  %ext = sext i1 %cmp to i32
411  store i32 %ext, i32 addrspace(1)* %gep.out
412  ret void
413}
414
415; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
416; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
417define amdgpu_kernel void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
418  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
419  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
420  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
421  %val = load float, float addrspace(1)* %gep.in
422  %cmp = fcmp ueq float %val, 2.0
423  %ext = sext i1 %cmp to i32
424  store i32 %ext, i32 addrspace(1)* %gep.out
425  ret void
426}
427
428; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
429; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
430define amdgpu_kernel void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
431  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
432  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
433  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
434  %val = load float, float addrspace(1)* %gep.in
435  %cmp = fcmp ugt float %val, 2.0
436  %ext = sext i1 %cmp to i32
437  store i32 %ext, i32 addrspace(1)* %gep.out
438  ret void
439}
440
441; GCN-LABEL: {{^}}commute_uge_2.0_f32:
442; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
443define amdgpu_kernel void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
444  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
445  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
446  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
447  %val = load float, float addrspace(1)* %gep.in
448  %cmp = fcmp uge float %val, 2.0
449  %ext = sext i1 %cmp to i32
450  store i32 %ext, i32 addrspace(1)* %gep.out
451  ret void
452}
453
454; GCN-LABEL: {{^}}commute_ult_2.0_f32:
455; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
456define amdgpu_kernel void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
457  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
458  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
459  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
460  %val = load float, float addrspace(1)* %gep.in
461  %cmp = fcmp ult float %val, 2.0
462  %ext = sext i1 %cmp to i32
463  store i32 %ext, i32 addrspace(1)* %gep.out
464  ret void
465}
466
467; GCN-LABEL: {{^}}commute_ule_2.0_f32:
468; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
469define amdgpu_kernel void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
470  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
471  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
472  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
473  %val = load float, float addrspace(1)* %gep.in
474  %cmp = fcmp ule float %val, 2.0
475  %ext = sext i1 %cmp to i32
476  store i32 %ext, i32 addrspace(1)* %gep.out
477  ret void
478}
479
480; GCN-LABEL: {{^}}commute_une_2.0_f32:
481; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
482define amdgpu_kernel void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
483  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
484  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
485  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
486  %val = load float, float addrspace(1)* %gep.in
487  %cmp = fcmp une float %val, 2.0
488  %ext = sext i1 %cmp to i32
489  store i32 %ext, i32 addrspace(1)* %gep.out
490  ret void
491}
492
493; GCN-LABEL: {{^}}commute_uno_2.0_f32:
494; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
495define amdgpu_kernel void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
496  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
497  %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
498  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
499  %val = load float, float addrspace(1)* %gep.in
500  %cmp = fcmp uno float %val, 2.0
501  %ext = sext i1 %cmp to i32
502  store i32 %ext, i32 addrspace(1)* %gep.out
503  ret void
504}
505
506; --------------------------------------------------------------------------------
507; f64 compares
508; --------------------------------------------------------------------------------
509
510
511; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
512; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
513define amdgpu_kernel void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
514  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
515  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
516  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
517  %val = load double, double addrspace(1)* %gep.in
518  %cmp = fcmp oeq double %val, 2.0
519  %ext = sext i1 %cmp to i32
520  store i32 %ext, i32 addrspace(1)* %gep.out
521  ret void
522}
523
524
525; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
526; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
527define amdgpu_kernel void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
528  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
529  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
530  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
531  %val = load double, double addrspace(1)* %gep.in
532  %cmp = fcmp ogt double %val, 2.0
533  %ext = sext i1 %cmp to i32
534  store i32 %ext, i32 addrspace(1)* %gep.out
535  ret void
536}
537
538; GCN-LABEL: {{^}}commute_oge_2.0_f64:
539; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
540define amdgpu_kernel void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
541  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
542  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
543  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
544  %val = load double, double addrspace(1)* %gep.in
545  %cmp = fcmp oge double %val, 2.0
546  %ext = sext i1 %cmp to i32
547  store i32 %ext, i32 addrspace(1)* %gep.out
548  ret void
549}
550
551; GCN-LABEL: {{^}}commute_olt_2.0_f64:
552; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
553define amdgpu_kernel void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
554  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
555  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
556  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
557  %val = load double, double addrspace(1)* %gep.in
558  %cmp = fcmp olt double %val, 2.0
559  %ext = sext i1 %cmp to i32
560  store i32 %ext, i32 addrspace(1)* %gep.out
561  ret void
562}
563
564; GCN-LABEL: {{^}}commute_ole_2.0_f64:
565; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
566define amdgpu_kernel void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
567  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
568  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
569  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
570  %val = load double, double addrspace(1)* %gep.in
571  %cmp = fcmp ole double %val, 2.0
572  %ext = sext i1 %cmp to i32
573  store i32 %ext, i32 addrspace(1)* %gep.out
574  ret void
575}
576
577; GCN-LABEL: {{^}}commute_one_2.0_f64:
578; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
579define amdgpu_kernel void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
580  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
581  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
582  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
583  %val = load double, double addrspace(1)* %gep.in
584  %cmp = fcmp one double %val, 2.0
585  %ext = sext i1 %cmp to i32
586  store i32 %ext, i32 addrspace(1)* %gep.out
587  ret void
588}
589
590; GCN-LABEL: {{^}}commute_ord_2.0_f64:
591; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
592define amdgpu_kernel void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
593  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
594  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
595  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
596  %val = load double, double addrspace(1)* %gep.in
597  %cmp = fcmp ord double %val, 2.0
598  %ext = sext i1 %cmp to i32
599  store i32 %ext, i32 addrspace(1)* %gep.out
600  ret void
601}
602
603; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
604; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
605define amdgpu_kernel void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
606  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
607  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
608  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
609  %val = load double, double addrspace(1)* %gep.in
610  %cmp = fcmp ueq double %val, 2.0
611  %ext = sext i1 %cmp to i32
612  store i32 %ext, i32 addrspace(1)* %gep.out
613  ret void
614}
615
616; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
617; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
618define amdgpu_kernel void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
619  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
620  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
621  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
622  %val = load double, double addrspace(1)* %gep.in
623  %cmp = fcmp ugt double %val, 2.0
624  %ext = sext i1 %cmp to i32
625  store i32 %ext, i32 addrspace(1)* %gep.out
626  ret void
627}
628
629; GCN-LABEL: {{^}}commute_uge_2.0_f64:
630; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
631define amdgpu_kernel void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
632  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
633  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
634  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
635  %val = load double, double addrspace(1)* %gep.in
636  %cmp = fcmp uge double %val, 2.0
637  %ext = sext i1 %cmp to i32
638  store i32 %ext, i32 addrspace(1)* %gep.out
639  ret void
640}
641
642; GCN-LABEL: {{^}}commute_ult_2.0_f64:
643; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
644define amdgpu_kernel void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
645  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
646  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
647  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
648  %val = load double, double addrspace(1)* %gep.in
649  %cmp = fcmp ult double %val, 2.0
650  %ext = sext i1 %cmp to i32
651  store i32 %ext, i32 addrspace(1)* %gep.out
652  ret void
653}
654
655; GCN-LABEL: {{^}}commute_ule_2.0_f64:
656; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
657define amdgpu_kernel void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
658  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
659  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
660  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
661  %val = load double, double addrspace(1)* %gep.in
662  %cmp = fcmp ule double %val, 2.0
663  %ext = sext i1 %cmp to i32
664  store i32 %ext, i32 addrspace(1)* %gep.out
665  ret void
666}
667
668; GCN-LABEL: {{^}}commute_une_2.0_f64:
669; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
670define amdgpu_kernel void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
671  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
672  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
673  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
674  %val = load double, double addrspace(1)* %gep.in
675  %cmp = fcmp une double %val, 2.0
676  %ext = sext i1 %cmp to i32
677  store i32 %ext, i32 addrspace(1)* %gep.out
678  ret void
679}
680
681; GCN-LABEL: {{^}}commute_uno_2.0_f64:
682; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
683define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
684  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
685  %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
686  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
687  %val = load double, double addrspace(1)* %gep.in
688  %cmp = fcmp uno double %val, 2.0
689  %ext = sext i1 %cmp to i32
690  store i32 %ext, i32 addrspace(1)* %gep.out
691  ret void
692}
693
694
695; FIXME: Should be able to fold this frameindex
696; Without commuting the frame index in the pre-regalloc run of
697; SIShrinkInstructions, this was using the VOP3 compare.
698
699; GCN-LABEL: {{^}}commute_frameindex:
700; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
701
702; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
703; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
704define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
705entry:
706  %stack0 = alloca i32, addrspace(5)
707  %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef
708  %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0
709  %ext = zext i1 %eq to i32
710  store volatile i32 %ext, i32 addrspace(1)* %out
711  ret void
712}
713
714attributes #0 = { nounwind readnone }
715attributes #1 = { nounwind }
716