1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
6
7; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
8; SI: v_bfe_i32
9; EG: BFE_INT
10; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
11define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
12  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
13  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
14  ret void
15}
16
17; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
18; SI: v_bfe_i32
19; EG: BFE_INT
20define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
21  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
22  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
23  ret void
24}
25
26; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
27; SI: v_bfe_i32
28; EG: BFE_INT
29define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
30  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
31  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
32  ret void
33}
34
35; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
36; SI: v_bfe_i32
37; EG: BFE_INT
38define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
39  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
40  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
41  ret void
42}
43
44; FUNC-LABEL: {{^}}v_bfe_print_arg:
45; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
46define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
47  %load = load i32, i32 addrspace(1)* %src0, align 4
48  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
49  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
50  ret void
51}
52
53; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
54; SI-NOT: {{[^@]}}bfe
55; SI: s_endpgm
56; EG-NOT: BFE
57define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
58  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
59  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
60  ret void
61}
62
63; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
64; SI-NOT: {{[^@]}}bfe
65; SI: s_endpgm
66; EG-NOT: BFE
67define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
68  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
69  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
70  ret void
71}
72
73; FUNC-LABEL: {{^}}bfe_i32_test_6:
74; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
75; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
76; SI: s_endpgm
77define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
78  %x = load i32, i32 addrspace(1)* %in, align 4
79  %shl = shl i32 %x, 31
80  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
81  store i32 %bfe, i32 addrspace(1)* %out, align 4
82  ret void
83}
84
85; FUNC-LABEL: {{^}}bfe_i32_test_7:
86; SI-NOT: shl
87; SI-NOT: {{[^@]}}bfe
88; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
89; SI: buffer_store_dword [[VREG]],
90; SI: s_endpgm
91define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
92  %x = load i32, i32 addrspace(1)* %in, align 4
93  %shl = shl i32 %x, 31
94  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
95  store i32 %bfe, i32 addrspace(1)* %out, align 4
96  ret void
97}
98
99; FUNC-LABEL: {{^}}bfe_i32_test_8:
100; SI: buffer_load_dword
101; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
102; SI: s_endpgm
103define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
104  %x = load i32, i32 addrspace(1)* %in, align 4
105  %shl = shl i32 %x, 31
106  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
107  store i32 %bfe, i32 addrspace(1)* %out, align 4
108  ret void
109}
110
111; FUNC-LABEL: {{^}}bfe_i32_test_9:
112; SI-NOT: {{[^@]}}bfe
113; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
114; SI-NOT: {{[^@]}}bfe
115; SI: s_endpgm
116define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
117  %x = load i32, i32 addrspace(1)* %in, align 4
118  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
119  store i32 %bfe, i32 addrspace(1)* %out, align 4
120  ret void
121}
122
123; FUNC-LABEL: {{^}}bfe_i32_test_10:
124; SI-NOT: {{[^@]}}bfe
125; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
126; SI-NOT: {{[^@]}}bfe
127; SI: s_endpgm
128define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
129  %x = load i32, i32 addrspace(1)* %in, align 4
130  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
131  store i32 %bfe, i32 addrspace(1)* %out, align 4
132  ret void
133}
134
135; FUNC-LABEL: {{^}}bfe_i32_test_11:
136; SI-NOT: {{[^@]}}bfe
137; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
138; SI-NOT: {{[^@]}}bfe
139; SI: s_endpgm
140define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
141  %x = load i32, i32 addrspace(1)* %in, align 4
142  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
143  store i32 %bfe, i32 addrspace(1)* %out, align 4
144  ret void
145}
146
147; FUNC-LABEL: {{^}}bfe_i32_test_12:
148; SI-NOT: {{[^@]}}bfe
149; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
150; SI-NOT: {{[^@]}}bfe
151; SI: s_endpgm
152define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
153  %x = load i32, i32 addrspace(1)* %in, align 4
154  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
155  store i32 %bfe, i32 addrspace(1)* %out, align 4
156  ret void
157}
158
159; FUNC-LABEL: {{^}}bfe_i32_test_13:
160; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
161; SI-NOT: {{[^@]}}bfe
162; SI: s_endpgm
163define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
164  %x = load i32, i32 addrspace(1)* %in, align 4
165  %shl = ashr i32 %x, 31
166  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
167  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
168}
169
170; FUNC-LABEL: {{^}}bfe_i32_test_14:
171; SI-NOT: lshr
172; SI-NOT: {{[^@]}}bfe
173; SI: s_endpgm
174define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
175  %x = load i32, i32 addrspace(1)* %in, align 4
176  %shl = lshr i32 %x, 31
177  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
178  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
179}
180
181; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
182; SI-NOT: {{[^@]}}bfe
183; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
184; SI: buffer_store_dword [[VREG]],
185; SI: s_endpgm
186; EG-NOT: BFE
187define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
188  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
189  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
190  ret void
191}
192
193; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
194; SI-NOT: {{[^@]}}bfe
195; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
196; SI: buffer_store_dword [[VREG]],
197; SI: s_endpgm
198; EG-NOT: BFE
199define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
200  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
201  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
202  ret void
203}
204
205; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
206; SI-NOT: {{[^@]}}bfe
207; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
208; SI: buffer_store_dword [[VREG]],
209; SI: s_endpgm
210; EG-NOT: BFE
211define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
212  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
213  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
214  ret void
215}
216
217; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
218; SI-NOT: {{[^@]}}bfe
219; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
220; SI: buffer_store_dword [[VREG]],
221; SI: s_endpgm
222; EG-NOT: BFE
223define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
224  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
225  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
226  ret void
227}
228
229; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
230; SI-NOT: {{[^@]}}bfe
231; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
232; SI: buffer_store_dword [[VREG]],
233; SI: s_endpgm
234; EG-NOT: BFE
235define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
236  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
237  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
238  ret void
239}
240
241; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
242; SI-NOT: {{[^@]}}bfe
243; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
244; SI: buffer_store_dword [[VREG]],
245; SI: s_endpgm
246; EG-NOT: BFE
247define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
248  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
249  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
250  ret void
251}
252
253; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
254; SI-NOT: {{[^@]}}bfe
255; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
256; SI: buffer_store_dword [[VREG]],
257; SI: s_endpgm
258; EG-NOT: BFE
259define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
260  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
261  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
262  ret void
263}
264
265; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
266; SI-NOT: {{[^@]}}bfe
267; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
268; SI: buffer_store_dword [[VREG]],
269; SI: s_endpgm
270; EG-NOT: BFE
271define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
272  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
273  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
274  ret void
275}
276
277; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
278; SI-NOT: {{[^@]}}bfe
279; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
280; SI: buffer_store_dword [[VREG]],
281; SI: s_endpgm
282; EG-NOT: BFE
283define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
284  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
285  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
286  ret void
287}
288
289; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
290; SI-NOT: {{[^@]}}bfe
291; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
292; SI: buffer_store_dword [[VREG]],
293; SI: s_endpgm
294; EG-NOT: BFE
295define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
296  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
297  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
298  ret void
299}
300
301; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
302; SI-NOT: {{[^@]}}bfe
303; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
304; SI: buffer_store_dword [[VREG]],
305; SI: s_endpgm
306; EG-NOT: BFE
307define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
308  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
309  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
310  ret void
311}
312
313; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
314; SI-NOT: {{[^@]}}bfe
315; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
316; SI: buffer_store_dword [[VREG]],
317; SI: s_endpgm
318; EG-NOT: BFE
319define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
320  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
321  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
322  ret void
323}
324
325; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
326; SI-NOT: {{[^@]}}bfe
327; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
328; SI: buffer_store_dword [[VREG]],
329; SI: s_endpgm
330; EG-NOT: BFE
331define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
332  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
333  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
334  ret void
335}
336
337; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
338; SI-NOT: {{[^@]}}bfe
339; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
340; SI: buffer_store_dword [[VREG]],
341; SI: s_endpgm
342; EG-NOT: BFE
343define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
344  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
345  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
346  ret void
347}
348
349; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
350; SI-NOT: {{[^@]}}bfe
351; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
352; SI: buffer_store_dword [[VREG]],
353; SI: s_endpgm
354; EG-NOT: BFE
355define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
356  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
357  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
358  ret void
359}
360
361; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
362; SI-NOT: {{[^@]}}bfe
363; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
364; SI: buffer_store_dword [[VREG]],
365; SI: s_endpgm
366; EG-NOT: BFE
367define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
368  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
369  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
370  ret void
371}
372
373; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
374; SI-NOT: {{[^@]}}bfe
375; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
376; SI: buffer_store_dword [[VREG]],
377; SI: s_endpgm
378; EG-NOT: BFE
379define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
380  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
381  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
382  ret void
383}
384
385; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
386; SI-NOT: {{[^@]}}bfe
387; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
388; SI: buffer_store_dword [[VREG]],
389; SI: s_endpgm
390; EG-NOT: BFE
391define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
392  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
393  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
394  ret void
395}
396
397; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
398; SI-NOT: {{[^@]}}bfe
399; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
400; SI: buffer_store_dword [[VREG]],
401; SI: s_endpgm
402; EG-NOT: BFE
403define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
404  %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
405  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
406  ret void
407}
408
409; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
410; SI: buffer_load_dword [[LOAD:v[0-9]+]],
411; SI-NOT: v_lshl
412; SI-NOT: v_ashr
413; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
414; SI: buffer_store_dword [[BFE]],
415define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
416  %x = load i32, i32 addrspace(1)* %in, align 4
417  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
418  %shl = shl i32 %bfe, 8
419  %ashr = ashr i32 %shl, 8
420  store i32 %ashr, i32 addrspace(1)* %out, align 4
421  ret void
422}
423
424; FUNC-LABEL: @simplify_demanded_bfe_sdiv
425; SI: buffer_load_dword [[LOAD:v[0-9]+]]
426; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
427; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
428; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
429; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
430; SI: buffer_store_dword [[TMP2]]
431define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
432  %src = load i32, i32 addrspace(1)* %in, align 4
433  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
434  %div = sdiv i32 %bfe, 2
435  store i32 %div, i32 addrspace(1)* %out, align 4
436  ret void
437}
438