1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3
4declare i32 @llvm.amdgcn.workitem.id.x() #1
5
6; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
7; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
8; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
9; GCN-DAG: v{{[0-9]}}
10; All nan values are converted to 0xffffffff
11; GCN: s_endpgm
12define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
13  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
14  %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
15  %f = load float, float addrspace(1)* %f.gep
16  %setcc = icmp ne i32 %c, 0
17  %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
18  store float %select, float addrspace(1)* %out
19  ret void
20}
21
22
23; This requires slightly trickier SGPR operand legalization since the
24; single constant bus SGPR usage is the last operand, and it should
25; never be moved.
26
27; GCN-LABEL: {{^}}v_cnd_nan:
28; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
29; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]}}, vcc
30; GCN-DAG: v{{[0-9]}}
31; All nan values are converted to 0xffffffff
32; GCN: s_endpgm
33define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
34  %setcc = icmp ne i32 %c, 0
35  %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
36  store float %select, float addrspace(1)* %out
37  ret void
38}
39
40; Test different compare and select operand types for optimal code
41; shrinking.
42; (select (cmp (sgprX, constant)), constant, sgprZ)
43
44; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
45; GCN: s_load_dwordx2
46; GCN: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}
47; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0
48; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
49; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
50define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 {
51  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
52  %tid.ext = sext i32 %tid to i64
53  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
54  %setcc = fcmp one float %x, 0.0
55  %select = select i1 %setcc, float 1.0, float %z
56  store float %select, float addrspace(1)* %out.gep
57  ret void
58}
59
60; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
61; GCN: s_load_dword [[X:s[0-9]+]]
62; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
63; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
64; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], vcc
65define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
66  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
67  %tid.ext = sext i32 %tid to i64
68  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
69  %setcc = fcmp one float %x, 0.0
70  %select = select i1 %setcc, float 1.0, float %x
71  store float %select, float addrspace(1)* %out.gep
72  ret void
73}
74
75; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
76; GCN-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
77; GCN-DAG: v_cmp_nlg_f32_e64 vcc, s[[X]], 0
78; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
79; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], vcc
80define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 {
81  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
82  %tid.ext = sext i32 %tid to i64
83  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
84  %setcc = fcmp one float %x, 0.0
85  %select = select i1 %setcc, float 0.0, float %z
86  store float %select, float addrspace(1)* %out.gep
87  ret void
88}
89
90; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
91; GCN: s_load_dword [[X:s[0-9]+]]
92; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
93; GCN-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
94; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], vcc
95define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
96  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
97  %tid.ext = sext i32 %tid to i64
98  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
99  %setcc = fcmp one float %x, 0.0
100  %select = select i1 %setcc, float 0.0, float %x
101  store float %select, float addrspace(1)* %out.gep
102  ret void
103}
104
105; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
106; GCN-DAG: s_load_dword [[X:s[0-9]+]]
107; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
108; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
109; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
110define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
111  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
112  %tid.ext = sext i32 %tid to i64
113  %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
114  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
115  %z = load float, float addrspace(1)* %z.gep
116  %setcc = fcmp one float %x, 0.0
117  %select = select i1 %setcc, float 0.0, float %z
118  store float %select, float addrspace(1)* %out.gep
119  ret void
120}
121
122; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
123; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
124; GCN-DAG: s_load_dword [[X:s[0-9]+]]
125; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
126; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
127define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
128  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
129  %tid.ext = sext i32 %tid to i64
130  %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
131  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
132  %z = load float, float addrspace(1)* %z.gep
133  %setcc = fcmp one float %x, 0.0
134  %select = select i1 %setcc, float 1.0, float %z
135  store float %select, float addrspace(1)* %out.gep
136  ret void
137}
138
139; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32:
140; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
141; GCN-DAG: s_load_dword [[Z:s[0-9]+]]
142; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]]
143; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
144; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
145define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 {
146  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
147  %tid.ext = sext i32 %tid to i64
148  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
149  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
150  %x = load float, float addrspace(1)* %x.gep
151  %setcc = fcmp olt float %x, 0.0
152  %select = select i1 %setcc, float 1.0, float %z
153  store float %select, float addrspace(1)* %out.gep
154  ret void
155}
156
157; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32:
158; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
159; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
160; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
161; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
162define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
163  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
164  %tid.ext = sext i32 %tid to i64
165  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
166  %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
167  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
168  %x = load volatile float, float addrspace(1)* %x.gep
169  %z = load volatile float, float addrspace(1)* %z.gep
170  %setcc = fcmp ult float %x, 0.0
171  %select = select i1 %setcc, float 1.0, float %z
172  store float %select, float addrspace(1)* %out.gep
173  ret void
174}
175
176; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32:
177; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
178; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
179; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]]
180; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc
181define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 {
182  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
183  %tid.ext = sext i32 %tid to i64
184  %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
185  %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext
186  %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
187  %x = load volatile i32, i32 addrspace(1)* %x.gep
188  %z = load volatile i32, i32 addrspace(1)* %z.gep
189  %setcc = icmp slt i32 %x, 0
190  %select = select i1 %setcc, i32 2, i32 %z
191  store i32 %select, i32 addrspace(1)* %out.gep
192  ret void
193}
194
195; FIXME: Why does VI make the wrong regalloc choice?
196; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
197; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}}
198; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}}
199; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
200; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
201; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
202
203; VI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
204; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
205; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
206define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
207  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
208  %tid.ext = sext i32 %tid to i64
209  %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext
210  %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
211  %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
212  %x = load volatile i64, i64 addrspace(1)* %x.gep
213  %z = load volatile i64, i64 addrspace(1)* %z.gep
214  %setcc = icmp slt i64 %x, 0
215  %select = select i1 %setcc, i64 2, i64 %z
216  store i64 %select, i64 addrspace(1)* %out.gep
217  ret void
218}
219
220; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
221; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
222; GCN: {{buffer|flat}}_load_dwordx4
223
224; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]]
225; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
226; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
227; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
228; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
229define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
230  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
231  %tid.ext = sext i32 %tid to i64
232  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
233  %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
234  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
235  %x = load volatile float, float addrspace(1)* %x.gep
236  %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
237  %setcc = fcmp ugt float %x, 4.0
238  %select = select i1 %setcc, <4 x float> %z, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>
239  store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
240  ret void
241}
242
243; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
244; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
245; GCN: {{buffer|flat}}_load_dwordx4
246
247; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]]
248; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
249; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
250; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
251; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
252define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
253  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
254  %tid.ext = sext i32 %tid to i64
255  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
256  %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
257  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
258  %x = load volatile float, float addrspace(1)* %x.gep
259  %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
260  %setcc = fcmp ugt float %x, 4.0
261  %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
262  store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
263  ret void
264}
265
266; This must be swapped as a vector type before the condition has
267; multiple uses.
268
269; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
270; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
271; GCN: {{buffer|flat}}_load_dwordx4
272
273; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]]
274; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
275; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
276; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
277; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
278define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
279  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
280  %tid.ext = sext i32 %tid to i64
281  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
282  %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
283  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
284  %x = load volatile float, float addrspace(1)* %x.gep
285  %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
286  %setcc = fcmp ugt float 4.0, %x
287  %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
288  store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
289  ret void
290}
291
292; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1:
293; GCN: load_dword
294; GCN: load_ubyte
295; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v
296; DCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1,
297; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v
298; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}}
299; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s
300; GCN: store_byte
301define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 {
302  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
303  %tid.ext = sext i32 %tid to i64
304  %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
305  %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext
306  %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext
307  %x = load volatile i32, i32 addrspace(1)* %x.gep
308  %z = load volatile i1, i1 addrspace(1)* %z.gep
309  %setcc = icmp slt i32 %x, 0
310  %select = select i1 %setcc, i1 true, i1 %z
311  store i1 %select, i1 addrspace(1)* %out.gep
312  ret void
313}
314
315; Different types compared vs. selected
316; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
317; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000
318; GCN-DAG: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
319; GCN-DAG: {{buffer|flat}}_load_dwordx2
320
321; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
322; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc
323; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
324define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 {
325  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
326  %tid.ext = sext i32 %tid to i64
327  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
328  %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext
329  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
330  %x = load volatile float, float addrspace(1)* %x.gep
331  %z = load volatile double, double addrspace(1)* %z.gep
332  %setcc = fcmp ult float %x, 0.0
333  %select = select i1 %setcc, double 1.0, double %z
334  store double %select, double addrspace(1)* %out.gep
335  ret void
336}
337
338; Different types compared vs. selected
339; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
340; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
341; GCN: {{buffer|flat}}_load_dwordx2
342
343; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]]
344; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
345; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
346define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
347  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
348  %tid.ext = sext i32 %tid to i64
349  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
350  %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
351  %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
352  %x = load volatile float, float addrspace(1)* %x.gep
353  %z = load volatile i64, i64 addrspace(1)* %z.gep
354  %setcc = fcmp one float %x, 0.0
355  %select = select i1 %setcc, i64 3, i64 %z
356  store i64 %select, i64 addrspace(1)* %out.gep
357  ret void
358}
359
360; Different types compared vs. selected
361; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
362; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
363; GCN: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
364
365; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]]
366; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc
367define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
368  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
369  %tid.ext = sext i32 %tid to i64
370  %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
371  %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
372  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
373  %x = load volatile i32, i32 addrspace(1)* %x.gep
374  %z = load volatile float, float addrspace(1)* %z.gep
375  %setcc = icmp ugt i32 %x, 1
376  %select = select i1 %setcc, float 4.0, float %z
377  store float %select, float addrspace(1)* %out.gep
378  ret void
379}
380
381; FIXME: Should be able to handle multiple uses
382
383; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
384; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
385
386; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]]
387; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc
388; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc
389define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
390  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
391  %tid.ext = sext i32 %tid to i64
392  %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
393  %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
394  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
395  %x = load volatile float, float addrspace(1)* %x.gep
396  %z = load volatile float, float addrspace(1)* %z.gep
397  %setcc = fcmp ugt float 4.0, %x
398  %select0 = select i1 %setcc, float -1.0, float %z
399  %select1 = select i1 %setcc, float -2.0, float %z
400  store volatile float %select0, float addrspace(1)* %out.gep
401  store volatile float %select1, float addrspace(1)* %out.gep
402  ret void
403}
404
405attributes #0 = { nounwind }
406attributes #1 = { nounwind readnone }
407