1; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}sdiv24_i8:
6; SI: v_cvt_f32_i32
7; SI: v_cvt_f32_i32
8; SI: v_rcp_iflag_f32
9; SI: v_cvt_i32_f32
10
11; EG: INT_TO_FLT
12; EG-DAG: INT_TO_FLT
13; EG-DAG: RECIP_IEEE
14; EG: FLT_TO_INT
15define amdgpu_kernel void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
16  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
17  %num = load i8, i8 addrspace(1) * %in
18  %den = load i8, i8 addrspace(1) * %den_ptr
19  %result = sdiv i8 %num, %den
20  store i8 %result, i8 addrspace(1)* %out
21  ret void
22}
23
24; FUNC-LABEL: {{^}}sdiv24_i16:
25; SI: v_cvt_f32_i32
26; SI: v_cvt_f32_i32
27; SI: v_rcp_iflag_f32
28; SI: v_cvt_i32_f32
29
30; EG: INT_TO_FLT
31; EG-DAG: INT_TO_FLT
32; EG-DAG: RECIP_IEEE
33; EG: FLT_TO_INT
34define amdgpu_kernel void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
35  %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
36  %num = load i16, i16 addrspace(1) * %in, align 2
37  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
38  %result = sdiv i16 %num, %den
39  store i16 %result, i16 addrspace(1)* %out, align 2
40  ret void
41}
42
43; FUNC-LABEL: {{^}}sdiv24_i32:
44; SI: v_cvt_f32_i32
45; SI: v_cvt_f32_i32
46; SI: v_rcp_iflag_f32
47; SI: v_cvt_i32_f32
48
49; EG: INT_TO_FLT
50; EG-DAG: INT_TO_FLT
51; EG-DAG: RECIP_IEEE
52; EG: FLT_TO_INT
53define amdgpu_kernel void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
54  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
55  %num = load i32, i32 addrspace(1) * %in, align 4
56  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
57  %num.i24.0 = shl i32 %num, 8
58  %den.i24.0 = shl i32 %den, 8
59  %num.i24 = ashr i32 %num.i24.0, 8
60  %den.i24 = ashr i32 %den.i24.0, 8
61  %result = sdiv i32 %num.i24, %den.i24
62  store i32 %result, i32 addrspace(1)* %out, align 4
63  ret void
64}
65
66; FUNC-LABEL: {{^}}sdiv25_i32:
67; SI-NOT: v_cvt_f32_i32
68; SI-NOT: v_rcp_f32
69
70; EG-NOT: INT_TO_FLT
71; EG-NOT: RECIP_IEEE
72define amdgpu_kernel void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
73  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
74  %num = load i32, i32 addrspace(1) * %in, align 4
75  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
76  %num.i24.0 = shl i32 %num, 7
77  %den.i24.0 = shl i32 %den, 7
78  %num.i24 = ashr i32 %num.i24.0, 7
79  %den.i24 = ashr i32 %den.i24.0, 7
80  %result = sdiv i32 %num.i24, %den.i24
81  store i32 %result, i32 addrspace(1)* %out, align 4
82  ret void
83}
84
85; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
86; SI-NOT: v_cvt_f32_i32
87; SI-NOT: v_rcp_f32
88
89; EG-NOT: INT_TO_FLT
90; EG-NOT: RECIP_IEEE
91define amdgpu_kernel void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
92  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
93  %num = load i32, i32 addrspace(1) * %in, align 4
94  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
95  %num.i24.0 = shl i32 %num, 8
96  %den.i24.0 = shl i32 %den, 7
97  %num.i24 = ashr i32 %num.i24.0, 8
98  %den.i24 = ashr i32 %den.i24.0, 7
99  %result = sdiv i32 %num.i24, %den.i24
100  store i32 %result, i32 addrspace(1)* %out, align 4
101  ret void
102}
103
104; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
105; SI-NOT: v_cvt_f32_i32
106; SI-NOT: v_rcp_f32
107
108; EG-NOT: INT_TO_FLT
109; EG-NOT: RECIP_IEEE
110define amdgpu_kernel void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
111  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
112  %num = load i32, i32 addrspace(1) * %in, align 4
113  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
114  %num.i24.0 = shl i32 %num, 7
115  %den.i24.0 = shl i32 %den, 8
116  %num.i24 = ashr i32 %num.i24.0, 7
117  %den.i24 = ashr i32 %den.i24.0, 8
118  %result = sdiv i32 %num.i24, %den.i24
119  store i32 %result, i32 addrspace(1)* %out, align 4
120  ret void
121}
122
123; FUNC-LABEL: {{^}}srem24_i8:
124; SI: v_cvt_f32_i32
125; SI: v_cvt_f32_i32
126; SI: v_rcp_iflag_f32
127; SI: v_cvt_i32_f32
128
129; EG: INT_TO_FLT
130; EG-DAG: INT_TO_FLT
131; EG-DAG: RECIP_IEEE
132; EG: FLT_TO_INT
133define amdgpu_kernel void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
134  %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
135  %num = load i8, i8 addrspace(1) * %in
136  %den = load i8, i8 addrspace(1) * %den_ptr
137  %result = srem i8 %num, %den
138  store i8 %result, i8 addrspace(1)* %out
139  ret void
140}
141
142; FUNC-LABEL: {{^}}srem24_i16:
143; SI: v_cvt_f32_i32
144; SI: v_cvt_f32_i32
145; SI: v_rcp_iflag_f32
146; SI: v_cvt_i32_f32
147
148; EG: INT_TO_FLT
149; EG-DAG: INT_TO_FLT
150; EG-DAG: RECIP_IEEE
151; EG: FLT_TO_INT
152define amdgpu_kernel void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
153  %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
154  %num = load i16, i16 addrspace(1) * %in, align 2
155  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
156  %result = srem i16 %num, %den
157  store i16 %result, i16 addrspace(1)* %out, align 2
158  ret void
159}
160
161; FUNC-LABEL: {{^}}srem24_i32:
162; SI: v_cvt_f32_i32
163; SI: v_cvt_f32_i32
164; SI: v_rcp_iflag_f32
165; SI: v_cvt_i32_f32
166
167; EG: INT_TO_FLT
168; EG-DAG: INT_TO_FLT
169; EG-DAG: RECIP_IEEE
170; EG: FLT_TO_INT
171define amdgpu_kernel void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
172  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
173  %num = load i32, i32 addrspace(1) * %in, align 4
174  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
175  %num.i24.0 = shl i32 %num, 8
176  %den.i24.0 = shl i32 %den, 8
177  %num.i24 = ashr i32 %num.i24.0, 8
178  %den.i24 = ashr i32 %den.i24.0, 8
179  %result = srem i32 %num.i24, %den.i24
180  store i32 %result, i32 addrspace(1)* %out, align 4
181  ret void
182}
183
184; FUNC-LABEL: {{^}}no_srem25_i32:
185; SI-NOT: v_cvt_f32_i32
186; SI-NOT: v_rcp_f32
187
188; EG-NOT: INT_TO_FLT
189; EG-NOT: RECIP_IEEE
190define amdgpu_kernel void @no_srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
191  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
192  %num = load i32, i32 addrspace(1) * %in, align 4
193  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
194  %num.i24.0 = shl i32 %num, 7
195  %den.i24.0 = shl i32 %den, 7
196  %num.i24 = ashr i32 %num.i24.0, 7
197  %den.i24 = ashr i32 %den.i24.0, 7
198  %result = srem i32 %num.i24, %den.i24
199  store i32 %result, i32 addrspace(1)* %out, align 4
200  ret void
201}
202
203; FUNC-LABEL: {{^}}no_sdiv25_i24_i25_i32:
204; SI-NOT: v_cvt_f32_i32
205; SI-NOT: v_rcp_f32
206
207; EG-NOT: INT_TO_FLT
208; EG-NOT: RECIP_IEEE
209define amdgpu_kernel void @no_sdiv25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
210  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
211  %num = load i32, i32 addrspace(1) * %in, align 4
212  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
213  %num.i24.0 = shl i32 %num, 8
214  %den.i25.0 = shl i32 %den, 7
215  %num.i24 = ashr i32 %num.i24.0, 8
216  %den.i25 = ashr i32 %den.i25.0, 7
217  %result = sdiv i32 %num.i24, %den.i25
218  store i32 %result, i32 addrspace(1)* %out, align 4
219  ret void
220}
221
222; FUNC-LABEL: {{^}}no_sdiv25_i25_i24_i32:
223; SI-NOT: v_cvt_f32_i32
224; SI-NOT: v_rcp_f32
225
226; EG-NOT: INT_TO_FLT
227; EG-NOT: RECIP_IEEE
228define amdgpu_kernel void @no_sdiv25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
229  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
230  %num = load i32, i32 addrspace(1) * %in, align 4
231  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
232  %num.i25.0 = shl i32 %num, 7
233  %den.i24.0 = shl i32 %den, 8
234  %num.i25 = ashr i32 %num.i25.0, 7
235  %den.i24 = ashr i32 %den.i24.0, 8
236  %result = sdiv i32 %num.i25, %den.i24
237  store i32 %result, i32 addrspace(1)* %out, align 4
238  ret void
239}
240
241; FUNC-LABEL: {{^}}no_srem25_i24_i25_i32:
242; SI-NOT: v_cvt_f32_i32
243; SI-NOT: v_rcp_f32
244
245; EG-NOT: INT_TO_FLT
246; EG-NOT: RECIP_IEEE
247define amdgpu_kernel void @no_srem25_i24_i25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
248  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
249  %num = load i32, i32 addrspace(1) * %in, align 4
250  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
251  %num.i24.0 = shl i32 %num, 8
252  %den.i25.0 = shl i32 %den, 7
253  %num.i24 = ashr i32 %num.i24.0, 8
254  %den.i25 = ashr i32 %den.i25.0, 7
255  %result = srem i32 %num.i24, %den.i25
256  store i32 %result, i32 addrspace(1)* %out, align 4
257  ret void
258}
259
260; FUNC-LABEL: {{^}}no_srem25_i25_i24_i32:
261; SI-NOT: v_cvt_f32_i32
262; SI-NOT: v_rcp_f32
263
264; EG-NOT: INT_TO_FLT
265; EG-NOT: RECIP_IEEE
266define amdgpu_kernel void @no_srem25_i25_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
267  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
268  %num = load i32, i32 addrspace(1) * %in, align 4
269  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
270  %num.i25.0 = shl i32 %num, 7
271  %den.i24.0 = shl i32 %den, 8
272  %num.i25 = ashr i32 %num.i25.0, 7
273  %den.i24 = ashr i32 %den.i24.0, 8
274  %result = srem i32 %num.i25, %den.i24
275  store i32 %result, i32 addrspace(1)* %out, align 4
276  ret void
277}
278
279; FUNC-LABEL: {{^}}srem25_i24_i11_i32:
280; SI: v_cvt_f32_i32
281; SI: v_rcp_iflag_f32
282; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24
283
284; EG: INT_TO_FLT
285; EG: RECIP_IEEE
286define amdgpu_kernel void @srem25_i24_i11_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
287  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
288  %num = load i32, i32 addrspace(1) * %in, align 4
289  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
290  %num.i24.0 = shl i32 %num, 8
291  %den.i11.0 = shl i32 %den, 21
292  %num.i24 = ashr i32 %num.i24.0, 8
293  %den.i11 = ashr i32 %den.i11.0, 21
294  %result = srem i32 %num.i24, %den.i11
295  store i32 %result, i32 addrspace(1)* %out, align 4
296  ret void
297}
298
299; FUNC-LABEL: {{^}}srem25_i11_i24_i32:
300; SI: v_cvt_f32_i32
301; SI: v_rcp_iflag_f32
302; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 24
303
304; EG: INT_TO_FLT
305; EG: RECIP_IEEE
306define amdgpu_kernel void @srem25_i11_i24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
307  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
308  %num = load i32, i32 addrspace(1) * %in, align 4
309  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
310  %num.i11.0 = shl i32 %num, 21
311  %den.i24.0 = shl i32 %den, 8
312  %num.i11 = ashr i32 %num.i11.0, 21
313  %den.i24 = ashr i32 %den.i24.0, 8
314  %result = srem i32 %num.i11, %den.i24
315  store i32 %result, i32 addrspace(1)* %out, align 4
316  ret void
317}
318
319; FUNC-LABEL: {{^}}srem25_i17_i12_i32:
320; SI: v_cvt_f32_i32
321; SI: v_rcp_iflag_f32
322; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 17
323
324; EG: INT_TO_FLT
325; EG: RECIP_IEEE
326define amdgpu_kernel void @srem25_i17_i12_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
327  %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
328  %num = load i32, i32 addrspace(1) * %in, align 4
329  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
330  %num.i17.0 = shl i32 %num, 15
331  %den.i12.0 = shl i32 %den, 20
332  %num.i17 = ashr i32 %num.i17.0, 15
333  %den.i12 = ashr i32 %den.i12.0, 20
334  %result = sdiv i32 %num.i17, %den.i12
335  store i32 %result, i32 addrspace(1)* %out, align 4
336  ret void
337}
338