1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg:
5; GCN: v_bfe_u32
6define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
7  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
8  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
9  ret void
10}
11
12; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm:
13; GCN: v_bfe_u32
14define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
15  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
16  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
17  ret void
18}
19
20; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg:
21; GCN: v_bfe_u32
22define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
23  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
24  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
25  ret void
26}
27
28; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg:
29; GCN: v_bfe_u32
30define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
31  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
32  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
33  ret void
34}
35
36; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
37; GCN-NOT: {{[^@]}}bfe
38; GCN: s_endpgm
39define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
40  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
41  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
42  ret void
43}
44
45; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
46; GCN-NOT: {{[^@]}}bfe
47; GCN: s_endpgm
48define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
49  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
50  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
51  ret void
52}
53
54; GCN-LABEL: {{^}}bfe_u32_zextload_i8:
55; GCN: buffer_load_ubyte
56; GCN-NOT: {{[^@]}}bfe
57; GCN: s_endpgm
58define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
59  %load = load i8, i8 addrspace(1)* %in
60  %ext = zext i8 %load to i32
61  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
62  store i32 %bfe, i32 addrspace(1)* %out, align 4
63  ret void
64}
65
66; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
67; GCN: buffer_load_dword
68; GCN: v_add_{{[iu]}}32
69; GCN-NEXT: v_and_b32_e32
70; FIXME: Should be using s_add_i32
71; GCN-NOT: {{[^@]}}bfe
72; GCN: s_endpgm
73define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
74  %load = load i32, i32 addrspace(1)* %in, align 4
75  %add = add i32 %load, 1
76  %ext = and i32 %add, 255
77  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
78  store i32 %bfe, i32 addrspace(1)* %out, align 4
79  ret void
80}
81
82; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
83; GCN: buffer_load_dword
84; GCN: v_add_{{[iu]}}32
85; GCN-NEXT: v_and_b32_e32
86; GCN-NOT: {{[^@]}}bfe
87; GCN: s_endpgm
88define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
89  %load = load i32, i32 addrspace(1)* %in, align 4
90  %add = add i32 %load, 1
91  %ext = and i32 %add, 65535
92  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
93  store i32 %bfe, i32 addrspace(1)* %out, align 4
94  ret void
95}
96
97; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
98; GCN: buffer_load_dword
99; GCN: v_add_{{[iu]}}32
100; GCN: bfe
101; GCN: s_endpgm
102define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
103  %load = load i32, i32 addrspace(1)* %in, align 4
104  %add = add i32 %load, 1
105  %ext = and i32 %add, 255
106  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
107  store i32 %bfe, i32 addrspace(1)* %out, align 4
108  ret void
109}
110
111; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
112; GCN: buffer_load_dword
113; GCN: v_add_{{[iu]}}32
114; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
115; GCN-NEXT: bfe
116; GCN: s_endpgm
117define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
118  %load = load i32, i32 addrspace(1)* %in, align 4
119  %add = add i32 %load, 1
120  %ext = and i32 %add, 255
121  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
122  store i32 %bfe, i32 addrspace(1)* %out, align 4
123  ret void
124}
125
126; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
127; GCN: buffer_load_dword
128; GCN: v_add_{{[iu]}}32
129; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
130; GCN-NEXT: bfe
131; GCN: s_endpgm
132define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
133  %load = load i32, i32 addrspace(1)* %in, align 4
134  %add = add i32 %load, 1
135  %ext = and i32 %add, 255
136  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
137  store i32 %bfe, i32 addrspace(1)* %out, align 4
138  ret void
139}
140
141; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
142; GCN: buffer_load_dword
143; GCN: v_add_{{[iu]}}32
144; GCN-NEXT: bfe
145; GCN: s_endpgm
146define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
147  %load = load i32, i32 addrspace(1)* %in, align 4
148  %add = add i32 %load, 1
149  %ext = and i32 %add, 65535
150  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
151  store i32 %bfe, i32 addrspace(1)* %out, align 4
152  ret void
153}
154
155; GCN-LABEL: {{^}}bfe_u32_test_1:
156; GCN: buffer_load_dword
157; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
158; GCN: s_endpgm
159define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
160  %x = load i32, i32 addrspace(1)* %in, align 4
161  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
162  store i32 %bfe, i32 addrspace(1)* %out, align 4
163  ret void
164}
165
166define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
167  %x = load i32, i32 addrspace(1)* %in, align 4
168  %shl = shl i32 %x, 31
169  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
170  store i32 %bfe, i32 addrspace(1)* %out, align 4
171  ret void
172}
173
174define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
175  %x = load i32, i32 addrspace(1)* %in, align 4
176  %shl = shl i32 %x, 31
177  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
178  store i32 %bfe, i32 addrspace(1)* %out, align 4
179  ret void
180}
181
182; GCN-LABEL: {{^}}bfe_u32_test_4:
183; GCN-NOT: lshl
184; GCN-NOT: shr
185; GCN-NOT: {{[^@]}}bfe
186; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
187; GCN: buffer_store_dword [[VREG]],
188; GCN: s_endpgm
189define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
190  %x = load i32, i32 addrspace(1)* %in, align 4
191  %shl = shl i32 %x, 31
192  %shr = lshr i32 %shl, 31
193  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
194  store i32 %bfe, i32 addrspace(1)* %out, align 4
195  ret void
196}
197
198; GCN-LABEL: {{^}}bfe_u32_test_5:
199; GCN: buffer_load_dword
200; GCN-NOT: lshl
201; GCN-NOT: shr
202; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
203; GCN: s_endpgm
204define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
205  %x = load i32, i32 addrspace(1)* %in, align 4
206  %shl = shl i32 %x, 31
207  %shr = ashr i32 %shl, 31
208  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
209  store i32 %bfe, i32 addrspace(1)* %out, align 4
210  ret void
211}
212
213; GCN-LABEL: {{^}}bfe_u32_test_6:
214; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
215; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
216; GCN: s_endpgm
217define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
218  %x = load i32, i32 addrspace(1)* %in, align 4
219  %shl = shl i32 %x, 31
220  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
221  store i32 %bfe, i32 addrspace(1)* %out, align 4
222  ret void
223}
224
225; GCN-LABEL: {{^}}bfe_u32_test_7:
226; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
227; GCN-NOT: {{[^@]}}bfe
228; GCN: s_endpgm
229define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
230  %x = load i32, i32 addrspace(1)* %in, align 4
231  %shl = shl i32 %x, 31
232  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
233  store i32 %bfe, i32 addrspace(1)* %out, align 4
234  ret void
235}
236
237; GCN-LABEL: {{^}}bfe_u32_test_8:
238; GCN-NOT: {{[^@]}}bfe
239; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
240; GCN-NOT: {{[^@]}}bfe
241; GCN: s_endpgm
242define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
243  %x = load i32, i32 addrspace(1)* %in, align 4
244  %shl = shl i32 %x, 31
245  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
246  store i32 %bfe, i32 addrspace(1)* %out, align 4
247  ret void
248}
249
250; GCN-LABEL: {{^}}bfe_u32_test_9:
251; GCN-NOT: {{[^@]}}bfe
252; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
253; GCN-NOT: {{[^@]}}bfe
254; GCN: s_endpgm
255define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
256  %x = load i32, i32 addrspace(1)* %in, align 4
257  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
258  store i32 %bfe, i32 addrspace(1)* %out, align 4
259  ret void
260}
261
262; GCN-LABEL: {{^}}bfe_u32_test_10:
263; GCN-NOT: {{[^@]}}bfe
264; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
265; GCN-NOT: {{[^@]}}bfe
266; GCN: s_endpgm
267define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
268  %x = load i32, i32 addrspace(1)* %in, align 4
269  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
270  store i32 %bfe, i32 addrspace(1)* %out, align 4
271  ret void
272}
273
274; GCN-LABEL: {{^}}bfe_u32_test_11:
275; GCN-NOT: {{[^@]}}bfe
276; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
277; GCN-NOT: {{[^@]}}bfe
278; GCN: s_endpgm
279define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
280  %x = load i32, i32 addrspace(1)* %in, align 4
281  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
282  store i32 %bfe, i32 addrspace(1)* %out, align 4
283  ret void
284}
285
286; GCN-LABEL: {{^}}bfe_u32_test_12:
287; GCN-NOT: {{[^@]}}bfe
288; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
289; GCN-NOT: {{[^@]}}bfe
290; GCN: s_endpgm
291define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
292  %x = load i32, i32 addrspace(1)* %in, align 4
293  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
294  store i32 %bfe, i32 addrspace(1)* %out, align 4
295  ret void
296}
297
298; GCN-LABEL: {{^}}bfe_u32_test_13:
299; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
300; GCN-NOT: {{[^@]}}bfe
301; GCN: s_endpgm
302define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
303  %x = load i32, i32 addrspace(1)* %in, align 4
304  %shl = ashr i32 %x, 31
305  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
306  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
307}
308
309; GCN-LABEL: {{^}}bfe_u32_test_14:
310; GCN-NOT: lshr
311; GCN-NOT: {{[^@]}}bfe
312; GCN: s_endpgm
313define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
314  %x = load i32, i32 addrspace(1)* %in, align 4
315  %shl = lshr i32 %x, 31
316  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
317  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
318}
319
320; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0:
321; GCN-NOT: {{[^@]}}bfe
322; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
323; GCN: buffer_store_dword [[VREG]],
324; GCN: s_endpgm
325; EG-NOT: BFE
326define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
327  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
328  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
329  ret void
330}
331
332; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1:
333; GCN-NOT: {{[^@]}}bfe
334; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
335; GCN: buffer_store_dword [[VREG]],
336; GCN: s_endpgm
337; EG-NOT: BFE
338define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
339  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
340  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
341  ret void
342}
343
344; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2:
345; GCN-NOT: {{[^@]}}bfe
346; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
347; GCN: buffer_store_dword [[VREG]],
348; GCN: s_endpgm
349; EG-NOT: BFE
350define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
351  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
352  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
353  ret void
354}
355
356; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3:
357; GCN-NOT: {{[^@]}}bfe
358; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
359; GCN: buffer_store_dword [[VREG]],
360; GCN: s_endpgm
361; EG-NOT: BFE
362define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
363  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
364  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
365  ret void
366}
367
368; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4:
369; GCN-NOT: {{[^@]}}bfe
370; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
371; GCN: buffer_store_dword [[VREG]],
372; GCN: s_endpgm
373; EG-NOT: BFE
374define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
375  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
376  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
377  ret void
378}
379
380; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5:
381; GCN-NOT: {{[^@]}}bfe
382; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
383; GCN: buffer_store_dword [[VREG]],
384; GCN: s_endpgm
385; EG-NOT: BFE
386define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
387  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
388  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
389  ret void
390}
391
392; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6:
393; GCN-NOT: {{[^@]}}bfe
394; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
395; GCN: buffer_store_dword [[VREG]],
396; GCN: s_endpgm
397; EG-NOT: BFE
398define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
399  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
400  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
401  ret void
402}
403
404; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7:
405; GCN-NOT: {{[^@]}}bfe
406; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
407; GCN: buffer_store_dword [[VREG]],
408; GCN: s_endpgm
409; EG-NOT: BFE
410define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
411  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
412  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
413  ret void
414}
415
416; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8:
417; GCN-NOT: {{[^@]}}bfe
418; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
419; GCN: buffer_store_dword [[VREG]],
420; GCN: s_endpgm
421; EG-NOT: BFE
422define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
423  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
424  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
425  ret void
426}
427
428; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9:
429; GCN-NOT: {{[^@]}}bfe
430; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
431; GCN: buffer_store_dword [[VREG]],
432; GCN: s_endpgm
433; EG-NOT: BFE
434define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
435  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
436  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
437  ret void
438}
439
440; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10:
441; GCN-NOT: {{[^@]}}bfe
442; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
443; GCN: buffer_store_dword [[VREG]],
444; GCN: s_endpgm
445; EG-NOT: BFE
446define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
447  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
448  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
449  ret void
450}
451
452; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11:
453; GCN-NOT: {{[^@]}}bfe
454; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
455; GCN: buffer_store_dword [[VREG]],
456; GCN: s_endpgm
457; EG-NOT: BFE
458define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
459  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
460  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
461  ret void
462}
463
464; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12:
465; GCN-NOT: {{[^@]}}bfe
466; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
467; GCN: buffer_store_dword [[VREG]],
468; GCN: s_endpgm
469; EG-NOT: BFE
470define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
471  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
472  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
473  ret void
474}
475
476; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13:
477; GCN-NOT: {{[^@]}}bfe
478; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
479; GCN: buffer_store_dword [[VREG]],
480; GCN: s_endpgm
481; EG-NOT: BFE
482define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
483  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
484  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
485  ret void
486}
487
488; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14:
489; GCN-NOT: {{[^@]}}bfe
490; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
491; GCN: buffer_store_dword [[VREG]],
492; GCN: s_endpgm
493; EG-NOT: BFE
494define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
495  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
496  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
497  ret void
498}
499
500; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15:
501; GCN-NOT: {{[^@]}}bfe
502; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
503; GCN: buffer_store_dword [[VREG]],
504; GCN: s_endpgm
505; EG-NOT: BFE
506define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
507  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
508  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
509  ret void
510}
511
512; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16:
513; GCN-NOT: {{[^@]}}bfe
514; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
515; GCN: buffer_store_dword [[VREG]],
516; GCN: s_endpgm
517; EG-NOT: BFE
518define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
519  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
520  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
521  ret void
522}
523
524; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17:
525; GCN-NOT: {{[^@]}}bfe
526; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
527; GCN: buffer_store_dword [[VREG]],
528; GCN: s_endpgm
529; EG-NOT: BFE
530define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
531  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
532  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
533  ret void
534}
535
536; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18:
537; GCN-NOT: {{[^@]}}bfe
538; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
539; GCN: buffer_store_dword [[VREG]],
540; GCN: s_endpgm
541; EG-NOT: BFE
542define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
543  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
544  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
545  ret void
546}
547
548; Make sure that SimplifyDemandedBits doesn't cause the and to be
549; reduced to the bits demanded by the bfe.
550
551; XXX: The operand to v_bfe_u32 could also just directly be the load register.
552; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
553; GCN: buffer_load_dword [[ARG:v[0-9]+]]
554; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
555; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
556; GCN-DAG: buffer_store_dword [[AND]]
557; GCN-DAG: buffer_store_dword [[BFE]]
558; GCN: s_endpgm
559define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
560                                            i32 addrspace(1)* %out1,
561                                            i32 addrspace(1)* %in) #0 {
562  %src = load i32, i32 addrspace(1)* %in, align 4
563  %and = and i32 %src, 63
564  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
565  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
566  store i32 %and, i32 addrspace(1)* %out1, align 4
567  ret void
568}
569
570; GCN-LABEL: {{^}}lshr_and:
571; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
572; GCN: buffer_store_dword
573define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
574  %b = lshr i32 %a, 6
575  %c = and i32 %b, 7
576  store i32 %c, i32 addrspace(1)* %out, align 8
577  ret void
578}
579
580; GCN-LABEL: {{^}}v_lshr_and:
581; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
582; GCN: buffer_store_dword
583define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
584  %c = lshr i32 %a, %b
585  %d = and i32 %c, 7
586  store i32 %d, i32 addrspace(1)* %out, align 8
587  ret void
588}
589
590; GCN-LABEL: {{^}}and_lshr:
591; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
592; GCN: buffer_store_dword
593define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
594  %b = and i32 %a, 448
595  %c = lshr i32 %b, 6
596  store i32 %c, i32 addrspace(1)* %out, align 8
597  ret void
598}
599
600; GCN-LABEL: {{^}}and_lshr2:
601; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
602; GCN: buffer_store_dword
603define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
604  %b = and i32 %a, 511
605  %c = lshr i32 %b, 6
606  store i32 %c, i32 addrspace(1)* %out, align 8
607  ret void
608}
609
610; GCN-LABEL: {{^}}shl_lshr:
611; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
612; GCN: buffer_store_dword
613define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
614  %b = shl i32 %a, 9
615  %c = lshr i32 %b, 11
616  store i32 %c, i32 addrspace(1)* %out, align 8
617  ret void
618}
619
620declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
621
622attributes #0 = { nounwind }
623attributes #1 = { nounwind readnone }
624