1; RUN:  llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
2; RUN:  llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
3; RUN:  llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
4; RUN:  llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s
5
6; FUNC-LABEL: {{^}}s_abs_i32:
7; GCN: s_abs_i32
8; GCN: s_add_i32
9
10; EG: MAX_INT
11define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
12  %neg = sub i32 0, %val
13  %cond = icmp sgt i32 %val, %neg
14  %res = select i1 %cond, i32 %val, i32 %neg
15  %res2 = add i32 %res, 2
16  store i32 %res2, i32 addrspace(1)* %out, align 4
17  ret void
18}
19
20; FUNC-LABEL: {{^}}v_abs_i32:
21; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
22; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
23
24; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]]
25
26; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
27; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2
28
29; EG: MAX_INT
30define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
31  %tid = call i32 @llvm.amdgcn.workitem.id.x()
32  %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
33  %val = load i32, i32 addrspace(1)* %gep.in, align 4
34  %neg = sub i32 0, %val
35  %cond = icmp sgt i32 %val, %neg
36  %res = select i1 %cond, i32 %val, i32 %neg
37  %res2 = add i32 %res, 2
38  store i32 %res2, i32 addrspace(1)* %out, align 4
39  ret void
40}
41
42; GCN-LABEL: {{^}}v_abs_i32_repeat_user:
43; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
44; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]]
45; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]]
46; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]]
47define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
48  %tid = call i32 @llvm.amdgcn.workitem.id.x()
49  %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid
50  %val = load i32, i32 addrspace(1)* %gep.in, align 4
51  %neg = sub i32 0, %val
52  %cond = icmp sgt i32 %val, %neg
53  %res = select i1 %cond, i32 %val, i32 %neg
54  %mul = mul i32 %res, %res
55  store i32 %mul, i32 addrspace(1)* %out, align 4
56  ret void
57}
58
59; FUNC-LABEL: {{^}}s_abs_v2i32:
60; GCN: s_abs_i32
61; GCN: s_abs_i32
62; GCN: s_add_i32
63; GCN: s_add_i32
64
65; EG: MAX_INT
66; EG: MAX_INT
67define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
68  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
69  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
70  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
71  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
72  %neg = sub <2 x i32> %z1, %val
73  %cond = icmp sgt <2 x i32> %val, %neg
74  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
75  %res2 = add <2 x i32> %res, %t1
76  store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
77  ret void
78}
79
80; FUNC-LABEL: {{^}}v_abs_v2i32:
81; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
82; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
83
84; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
85; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
86
87; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
88; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
89
90; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
91; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc
92
93; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
94; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
95
96; EG: MAX_INT
97; EG: MAX_INT
98define amdgpu_kernel void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
99  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
100  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
101  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
102  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
103  %tid = call i32 @llvm.amdgcn.workitem.id.x()
104  %gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid
105  %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4
106  %neg = sub <2 x i32> %z1, %val
107  %cond = icmp sgt <2 x i32> %val, %neg
108  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
109  %res2 = add <2 x i32> %res, %t1
110  store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
111  ret void
112}
113
114; FUNC-LABEL: {{^}}s_abs_v4i32:
115; TODO: this should use s_abs_i32
116; GCN: s_abs_i32
117; GCN: s_abs_i32
118; GCN: s_abs_i32
119; GCN: s_abs_i32
120
121; GCN: s_add_i32
122; GCN: s_add_i32
123; GCN: s_add_i32
124; GCN: s_add_i32
125
126; EG: MAX_INT
127; EG: MAX_INT
128; EG: MAX_INT
129; EG: MAX_INT
130define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
131  %z0 = insertelement <4 x i32> undef, i32 0, i32 0
132  %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
133  %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
134  %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
135  %t0 = insertelement <4 x i32> undef, i32 2, i32 0
136  %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
137  %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
138  %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
139  %neg = sub <4 x i32> %z3, %val
140  %cond = icmp sgt <4 x i32> %val, %neg
141  %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
142  %res2 = add <4 x i32> %res, %t3
143  store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
144  ret void
145}
146
147; FUNC-LABEL: {{^}}v_abs_v4i32:
148
149; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
150; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
151; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
152; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
153
154; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]]
155; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]]
156; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]]
157; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]]
158
159; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]]
160; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]]
161; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]]
162; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]]
163
164; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
165; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
166; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
167; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc,
168
169; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
170; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
171; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
172; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2,
173
174; EG: MAX_INT
175; EG: MAX_INT
176; EG: MAX_INT
177; EG: MAX_INT
178define amdgpu_kernel void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
179  %z0 = insertelement <4 x i32> undef, i32 0, i32 0
180  %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
181  %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
182  %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
183  %t0 = insertelement <4 x i32> undef, i32 2, i32 0
184  %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
185  %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
186  %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
187  %tid = call i32 @llvm.amdgcn.workitem.id.x()
188  %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid
189  %val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4
190  %neg = sub <4 x i32> %z3, %val
191  %cond = icmp sgt <4 x i32> %val, %neg
192  %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
193  %res2 = add <4 x i32> %res, %t3
194  store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
195  ret void
196}
197
198; FUNC-LABEL: {{^}}s_min_max_i32:
199; GCN: s_load_dword [[VAL0:s[0-9]+]]
200; GCN: s_load_dword [[VAL1:s[0-9]+]]
201
202; GCN-DAG: s_min_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]]
203; GCN-DAG: s_max_i32 s{{[0-9]+}}, [[VAL0]], [[VAL1]]
204define amdgpu_kernel void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32], i32 %val0, [8 x i32], i32 %val1) nounwind {
205  %cond0 = icmp sgt i32 %val0, %val1
206  %sel0 = select i1 %cond0, i32 %val0, i32 %val1
207  %sel1 = select i1 %cond0, i32 %val1, i32 %val0
208
209  store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4
210  store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4
211  ret void
212}
213
214; FUNC-LABEL: {{^}}v_min_max_i32:
215; GCN: {{buffer|flat|global}}_load_dword [[VAL0:v[0-9]+]]
216; GCN: {{buffer|flat|global}}_load_dword [[VAL1:v[0-9]+]]
217
218; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]]
219; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]]
220define amdgpu_kernel void @v_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
221  %val0 = load volatile i32, i32 addrspace(1)* %ptr0
222  %val1 = load volatile i32, i32 addrspace(1)* %ptr1
223
224  %cond0 = icmp sgt i32 %val0, %val1
225  %sel0 = select i1 %cond0, i32 %val0, i32 %val1
226  %sel1 = select i1 %cond0, i32 %val1, i32 %val0
227
228  store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4
229  store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4
230  ret void
231}
232
233; FUNC-LABEL: {{^}}s_min_max_v4i32:
234; GCN-DAG: s_min_i32
235; GCN-DAG: s_min_i32
236; GCN-DAG: s_min_i32
237; GCN-DAG: s_min_i32
238; GCN-DAG: s_max_i32
239; GCN-DAG: s_max_i32
240; GCN-DAG: s_max_i32
241; GCN-DAG: s_max_i32
242define amdgpu_kernel void @s_min_max_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %val0, <4 x i32> %val1) nounwind {
243  %cond0 = icmp sgt <4 x i32> %val0, %val1
244  %sel0 = select <4 x i1> %cond0, <4 x i32> %val0, <4 x i32> %val1
245  %sel1 = select <4 x i1> %cond0, <4 x i32> %val1, <4 x i32> %val0
246
247  store volatile <4 x i32> %sel0, <4 x i32> addrspace(1)* %out0, align 4
248  store volatile <4 x i32> %sel1, <4 x i32> addrspace(1)* %out1, align 4
249  ret void
250}
251
252; FUNC-LABEL: {{^}}v_min_max_i32_user:
253; GCN: v_cmp_gt_i32_e32
254; GCN-DAG: v_cndmask_b32_e32
255; GCN-DAG: v_cndmask_b32_e32
256; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
257define amdgpu_kernel void @v_min_max_i32_user(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr0, i32 addrspace(1)* %ptr1) nounwind {
258  %val0 = load volatile i32, i32 addrspace(1)* %ptr0
259  %val1 = load volatile i32, i32 addrspace(1)* %ptr1
260
261  %cond0 = icmp sgt i32 %val0, %val1
262  %sel0 = select i1 %cond0, i32 %val0, i32 %val1
263  %sel1 = select i1 %cond0, i32 %val1, i32 %val0
264
265  store volatile i32 %sel0, i32 addrspace(1)* %out0, align 4
266  store volatile i32 %sel1, i32 addrspace(1)* %out1, align 4
267  store volatile i1 %cond0, i1 addrspace(1)* undef
268  ret void
269}
270
271declare i32 @llvm.amdgcn.workitem.id.x() #0
272
273attributes #0 = { nounwind readnone }
274attributes #1 = { nounwind }
275