1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2
3declare i32 @llvm.r600.read.tidig.x() nounwind readnone
4
5; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
6; SI: v_min_i32_e32
7define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
8  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
9  %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
10  %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
11  %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
12  %a = load i32, i32 addrspace(1)* %gep0, align 4
13  %b = load i32, i32 addrspace(1)* %gep1, align 4
14  %cmp = icmp sle i32 %a, %b
15  %val = select i1 %cmp, i32 %a, i32 %b
16  store i32 %val, i32 addrspace(1)* %outgep, align 4
17  ret void
18}
19
20; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
21; SI: s_min_i32
22define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
23  %cmp = icmp sle i32 %a, %b
24  %val = select i1 %cmp, i32 %a, i32 %b
25  store i32 %val, i32 addrspace(1)* %out, align 4
26  ret void
27}
28
29; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
30; SI: s_min_i32
31define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
32  %cmp = icmp sle <1 x i32> %a, %b
33  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
34  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
35  ret void
36}
37
38; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
39; SI: s_min_i32
40; SI: s_min_i32
41; SI: s_min_i32
42; SI: s_min_i32
43define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
44  %cmp = icmp sle <4 x i32> %a, %b
45  %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
46  store <4 x i32> %val, <4 x i32> addrspace(1)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
51; SI: s_load_dword
52; SI: s_load_dword
53; SI: s_sext_i32_i8
54; SI: s_sext_i32_i8
55; SI: s_min_i32
56define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
57  %cmp = icmp sle i8 %a, %b
58  %val = select i1 %cmp, i8 %a, i8 %b
59  store i8 %val, i8 addrspace(1)* %out
60  ret void
61}
62
63; XXX - should be able to use s_min if we stop unnecessarily doing
64; extloads with mubuf instructions.
65
66; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
67; SI: buffer_load_sbyte
68; SI: buffer_load_sbyte
69; SI: buffer_load_sbyte
70; SI: buffer_load_sbyte
71; SI: buffer_load_sbyte
72; SI: buffer_load_sbyte
73; SI: buffer_load_sbyte
74; SI: buffer_load_sbyte
75
76; SI: v_min_i32
77; SI: v_min_i32
78; SI: v_min_i32
79; SI: v_min_i32
80
81; SI: s_endpgm
82define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind {
83  %cmp = icmp sle <4 x i8> %a, %b
84  %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
85  store <4 x i8> %val, <4 x i8> addrspace(1)* %out
86  ret void
87}
88
89; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
90; SI: v_min_i32
91; SI: v_min_i32
92; SI: v_min_i32
93; SI: v_min_i32
94define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
95  %cmp = icmp sle <4 x i16> %a, %b
96  %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
97  store <4 x i16> %val, <4 x i16> addrspace(1)* %out
98  ret void
99}
100
101; FUNC-LABEL: @v_test_imin_slt_i32
102; SI: v_min_i32_e32
103define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
104  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
105  %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
106  %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
107  %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
108  %a = load i32, i32 addrspace(1)* %gep0, align 4
109  %b = load i32, i32 addrspace(1)* %gep1, align 4
110  %cmp = icmp slt i32 %a, %b
111  %val = select i1 %cmp, i32 %a, i32 %b
112  store i32 %val, i32 addrspace(1)* %outgep, align 4
113  ret void
114}
115
116; FUNC-LABEL: @s_test_imin_slt_i32
117; SI: s_min_i32
118define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
119  %cmp = icmp slt i32 %a, %b
120  %val = select i1 %cmp, i32 %a, i32 %b
121  store i32 %val, i32 addrspace(1)* %out, align 4
122  ret void
123}
124
125; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
126; SI: s_min_i32
127; SI: s_min_i32
128define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
129  %cmp = icmp slt <2 x i32> %a, %b
130  %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
131  store <2 x i32> %val, <2 x i32> addrspace(1)* %out
132  ret void
133}
134
135; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
136; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
137define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
138  %cmp = icmp slt i32 %a, 8
139  %val = select i1 %cmp, i32 %a, i32 8
140  store i32 %val, i32 addrspace(1)* %out, align 4
141  ret void
142}
143
144; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
145; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
146define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
147  %cmp = icmp sle i32 %a, 8
148  %val = select i1 %cmp, i32 %a, i32 8
149  store i32 %val, i32 addrspace(1)* %out, align 4
150  ret void
151}
152
153; FUNC-LABEL: @v_test_umin_ule_i32
154; SI: v_min_u32_e32
155define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
156  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
157  %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
158  %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
159  %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
160  %a = load i32, i32 addrspace(1)* %gep0, align 4
161  %b = load i32, i32 addrspace(1)* %gep1, align 4
162  %cmp = icmp ule i32 %a, %b
163  %val = select i1 %cmp, i32 %a, i32 %b
164  store i32 %val, i32 addrspace(1)* %outgep, align 4
165  ret void
166}
167
168; FUNC-LABEL: @v_test_umin_ule_v3i32
169; SI: v_min_u32_e32
170; SI: v_min_u32_e32
171; SI: v_min_u32_e32
172; SI-NOT: v_min_u32_e32
173; SI: s_endpgm
174define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind {
175  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
176  %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid
177  %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid
178  %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
179  %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0
180  %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1
181  %cmp = icmp ule <3 x i32> %a, %b
182  %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
183  store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep
184  ret void
185}
186; FUNC-LABEL: @s_test_umin_ule_i32
187; SI: s_min_u32
188define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
189  %cmp = icmp ule i32 %a, %b
190  %val = select i1 %cmp, i32 %a, i32 %b
191  store i32 %val, i32 addrspace(1)* %out, align 4
192  ret void
193}
194
195; FUNC-LABEL: @v_test_umin_ult_i32
196; SI: v_min_u32_e32
197define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
198  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
199  %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
200  %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
201  %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
202  %a = load i32, i32 addrspace(1)* %gep0, align 4
203  %b = load i32, i32 addrspace(1)* %gep1, align 4
204  %cmp = icmp ult i32 %a, %b
205  %val = select i1 %cmp, i32 %a, i32 %b
206  store i32 %val, i32 addrspace(1)* %outgep, align 4
207  ret void
208}
209
210; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
211; SI: buffer_load_ubyte
212; SI: buffer_load_ubyte
213; SI: v_min_u32_e32
214define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
215  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
216  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
217  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
218  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
219  %a = load i8, i8 addrspace(1)* %gep0, align 1
220  %b = load i8, i8 addrspace(1)* %gep1, align 1
221  %cmp = icmp ult i8 %a, %b
222  %val = select i1 %cmp, i8 %a, i8 %b
223  store i8 %val, i8 addrspace(1)* %outgep, align 1
224  ret void
225}
226
227; FUNC-LABEL: @s_test_umin_ult_i32
228; SI: s_min_u32
229define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
230  %cmp = icmp ult i32 %a, %b
231  %val = select i1 %cmp, i32 %a, i32 %b
232  store i32 %val, i32 addrspace(1)* %out, align 4
233  ret void
234}
235
236; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
237; SI-NOT: v_min
238; SI: v_cmp_lt_u32
239; SI-NEXT: v_cndmask_b32
240; SI-NOT: v_min
241; SI: s_endpgm
242define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
243  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
244  %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
245  %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
246  %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
247  %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
248  %a = load i32, i32 addrspace(1)* %gep0, align 4
249  %b = load i32, i32 addrspace(1)* %gep1, align 4
250  %cmp = icmp ult i32 %a, %b
251  %val = select i1 %cmp, i32 %a, i32 %b
252  store i32 %val, i32 addrspace(1)* %outgep0, align 4
253  store i1 %cmp, i1 addrspace(1)* %outgep1
254  ret void
255}
256
257
258; FUNC-LABEL: @s_test_umin_ult_v1i32
259; SI: s_min_u32
260define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
261  %cmp = icmp ult <1 x i32> %a, %b
262  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
263  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
264  ret void
265}
266
267; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
268; SI: s_min_u32
269; SI: s_min_u32
270; SI: s_min_u32
271; SI: s_min_u32
272; SI: s_min_u32
273; SI: s_min_u32
274; SI: s_min_u32
275; SI: s_min_u32
276define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
277  %cmp = icmp ult <8 x i32> %a, %b
278  %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
279  store <8 x i32> %val, <8 x i32> addrspace(1)* %out
280  ret void
281}
282
283; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
284; SI: v_min_u32
285; SI: v_min_u32
286; SI: v_min_u32
287; SI: v_min_u32
288; SI: v_min_u32
289; SI: v_min_u32
290; SI: v_min_u32
291; SI: v_min_u32
292define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
293  %cmp = icmp ult <8 x i16> %a, %b
294  %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
295  store <8 x i16> %val, <8 x i16> addrspace(1)* %out
296  ret void
297}
298
299; Make sure redundant and removed
300; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
301; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
302; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
303; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
304; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
305; SI-NEXT: buffer_store_dword [[VMIN]]
306define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
307  %a.ext = zext i16 %a to i32
308  %b.ext = zext i16 %b to i32
309  %cmp = icmp ult i32 %a.ext, %b.ext
310  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
311  %mask = and i32 %val, 65535
312  store i32 %mask, i32 addrspace(1)* %out
313  ret void
314}
315
316; Make sure redundant sign_extend_inreg removed.
317
318; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
319; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
320; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
321; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
322; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
323; SI-NEXT: buffer_store_dword [[VMIN]]
324define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
325  %a.ext = sext i16 %a to i32
326  %b.ext = sext i16 %b to i32
327  %cmp = icmp slt i32 %a.ext, %b.ext
328  %val = select i1 %cmp, i32 %a.ext, i32 %b.ext
329  %shl = shl i32 %val, 16
330  %sextinreg = ashr i32 %shl, 16
331  store i32 %sextinreg, i32 addrspace(1)* %out
332  ret void
333}
334
335; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
336; SI: s_min_i32
337define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
338  %cmp = icmp sle i16 %a, %b
339  %val = select i1 %cmp, i16 %a, i16 %b
340  store i16 %val, i16 addrspace(1)* %out
341  ret void
342}
343