1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
4
5; Extract the high bit of the low half
6; GCN-LABEL: {{^}}v_uextract_bit_31_i64:
7; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
8; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
9; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
10; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
11define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
12  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
13  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
14  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
15  %ld.64 = load i64, i64 addrspace(1)* %in.gep
16  %srl = lshr i64 %ld.64, 31
17  %bit = and i64 %srl, 1
18  store i64 %bit, i64 addrspace(1)* %out.gep
19  ret void
20}
21
22; Extract the high bit of the high half
23; GCN-LABEL: {{^}}v_uextract_bit_63_i64:
24; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
25; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
26; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
27; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
28define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
29  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
30  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
31  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
32  %ld.64 = load i64, i64 addrspace(1)* %in.gep
33  %srl = lshr i64 %ld.64, 63
34  %bit = and i64 %srl, 1
35  store i64 %bit, i64 addrspace(1)* %out.gep
36  ret void
37}
38
39; GCN-LABEL: {{^}}v_uextract_bit_1_i64:
40; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
41; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
42; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
43; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
44define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
45  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
46  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
47  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
48  %ld.64 = load i64, i64 addrspace(1)* %in.gep
49  %srl = lshr i64 %ld.64, 1
50  %bit = and i64 %srl, 1
51  store i64 %bit, i64 addrspace(1)* %out.gep
52  ret void
53}
54
55; GCN-LABEL: {{^}}v_uextract_bit_20_i64:
56; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
57; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
58; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
59; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
60define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
61  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
62  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
63  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
64  %ld.64 = load i64, i64 addrspace(1)* %in.gep
65  %srl = lshr i64 %ld.64, 20
66  %bit = and i64 %srl, 1
67  store i64 %bit, i64 addrspace(1)* %out.gep
68  ret void
69}
70
71; GCN-LABEL: {{^}}v_uextract_bit_32_i64:
72; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
73; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
74; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
75; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
76define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
77  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
78  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
79  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
80  %ld.64 = load i64, i64 addrspace(1)* %in.gep
81  %srl = lshr i64 %ld.64, 32
82  %bit = and i64 %srl, 1
83  store i64 %bit, i64 addrspace(1)* %out.gep
84  ret void
85}
86
87; GCN-LABEL: {{^}}v_uextract_bit_33_i64:
88; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
89; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
90; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
91; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
92define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
93  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
94  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
95  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
96  %ld.64 = load i64, i64 addrspace(1)* %in.gep
97  %srl = lshr i64 %ld.64, 33
98  %bit = and i64 %srl, 1
99  store i64 %bit, i64 addrspace(1)* %out.gep
100  ret void
101}
102
103; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64:
104; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
105; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
106; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
107; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
108define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
109  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
110  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
111  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
112  %ld.64 = load i64, i64 addrspace(1)* %in.gep
113  %srl = lshr i64 %ld.64, 20
114  %bit = and i64 %srl, 3
115  store i64 %bit, i64 addrspace(1)* %out.gep
116  ret void
117}
118
119; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64:
120; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
121; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
122; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
123; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
124define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
125  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
126  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
127  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
128  %ld.64 = load i64, i64 addrspace(1)* %in.gep
129  %srl = lshr i64 %ld.64, 1
130  %bit = and i64 %srl, 1073741823
131  store i64 %bit, i64 addrspace(1)* %out.gep
132  ret void
133}
134
135; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64:
136; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
137; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
138; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
139; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
140define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
141  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
142  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
143  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
144  %ld.64 = load i64, i64 addrspace(1)* %in.gep
145  %srl = lshr i64 %ld.64, 1
146  %bit = and i64 %srl, 2147483647
147  store i64 %bit, i64 addrspace(1)* %out.gep
148  ret void
149}
150
151; Spans the dword boundary, so requires full shift
152; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
153; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
154; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
155; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
156; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
157; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
158define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
159  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
160  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
161  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
162  %ld.64 = load i64, i64 addrspace(1)* %in.gep
163  %srl = lshr i64 %ld.64, 31
164  %bit = and i64 %srl, 3
165  store i64 %bit, i64 addrspace(1)* %out.gep
166  ret void
167}
168
169; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64:
170; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
171; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
172; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
173; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
174define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
175  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
176  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
177  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
178  %ld.64 = load i64, i64 addrspace(1)* %in.gep
179  %srl = lshr i64 %ld.64, 33
180  %bit = and i64 %srl, 3
181  store i64 %bit, i64 addrspace(1)* %out.gep
182  ret void
183}
184
185; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
186; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
187; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
188; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
189; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
190; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
191define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
192  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
193  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
194  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
195  %ld.64 = load i64, i64 addrspace(1)* %in.gep
196  %srl = lshr i64 %ld.64, 30
197  %bit = and i64 %srl, 1073741823
198  store i64 %bit, i64 addrspace(1)* %out.gep
199  ret void
200}
201
202; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64:
203; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
204; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
205; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
206; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
207define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
208  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
209  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
210  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
211  %ld.64 = load i64, i64 addrspace(1)* %in.gep
212  %srl = lshr i64 %ld.64, 33
213  %bit = and i64 %srl, 1073741823
214  store i64 %bit, i64 addrspace(1)* %out.gep
215  ret void
216}
217
218; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
219; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
220; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
221; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
222; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
223define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
224  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
225  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
226  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
227  %ld.64 = load i64, i64 addrspace(1)* %in.gep
228  %srl = lshr i64 %ld.64, 31
229  %and = and i64 %srl, 4294967295
230  store i64 %and, i64 addrspace(1)* %out
231  ret void
232}
233
234; trunc applied before and mask
235; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32:
236; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
237; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
238; GCN: buffer_store_dword v[[SHIFT]]
239define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
240  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
241  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
242  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
243  %ld.64 = load i64, i64 addrspace(1)* %in.gep
244  %srl = lshr i64 %ld.64, 31
245  %trunc = trunc i64 %srl to i32
246  %bit = and i32 %trunc, 1
247  store i32 %bit, i32 addrspace(1)* %out.gep
248  ret void
249}
250
251; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32:
252; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
253; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
254; GCN: buffer_store_dword [[BFE]]
255define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
256  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
257  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
258  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
259  %ld.64 = load i64, i64 addrspace(1)* %in.gep
260  %srl = lshr i64 %ld.64, 3
261  %trunc = trunc i64 %srl to i32
262  %bit = and i32 %trunc, 1
263  store i32 %bit, i32 addrspace(1)* %out.gep
264  ret void
265}
266
267; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32:
268; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
269; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
270; GCN: buffer_store_dword [[BFE]]
271define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
272  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
273  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
274  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
275  %ld.64 = load i64, i64 addrspace(1)* %in.gep
276  %srl = lshr i64 %ld.64, 33
277  %trunc = trunc i64 %srl to i32
278  %bit = and i32 %trunc, 1
279  store i32 %bit, i32 addrspace(1)* %out.gep
280  ret void
281}
282
283; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
284; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
285; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
286; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
287; GCN-NOT: v[[SHRLO]]
288; GCN: buffer_store_dword v[[SHRLO]]
289define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
290  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
291  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
292  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
293  %ld.64 = load i64, i64 addrspace(1)* %in.gep
294  %srl = lshr i64 %ld.64, 31
295  %trunc = trunc i64 %srl to i32
296  %bit = and i32 %trunc, 3
297  store i32 %bit, i32 addrspace(1)* %out.gep
298  ret void
299}
300
301; GCN-LABEL: {{^}}and_not_mask_i64:
302; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
303; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
304; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
305; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]]
306; GCN-NOT: v[[SHRLO]]
307; GCN-NOT: v[[SHRHI]]
308; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
309define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
310  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
311  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
312  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
313  %ld.64 = load i64, i64 addrspace(1)* %in.gep
314  %srl = lshr i64 %ld.64, 20
315  %bit = and i64 %srl, 4
316  store i64 %bit, i64 addrspace(1)* %out.gep
317  ret void
318}
319
320; The instruction count is the same with/without hasOneUse, but
321; keeping the 32-bit and has a smaller encoding size than the bfe.
322
323; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
324; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
325; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
326; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
327; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
328; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
329; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
330define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
331  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
332  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
333  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
334  %ld.64 = load i64, i64 addrspace(1)* %in.gep
335  %srl = lshr i64 %ld.64, 27
336  %bit = and i64 %srl, 3
337  store volatile i64 %srl, i64 addrspace(1)* %out
338  store volatile i64 %bit, i64 addrspace(1)* %out
339  ret void
340}
341
342; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64:
343; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
344; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
345; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
346; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
347; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO]]{{\]}}
348; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
349define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
350  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
351  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
352  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
353  %ld.64 = load i64, i64 addrspace(1)* %in.gep
354  %srl = lshr i64 %ld.64, 34
355  %bit = and i64 %srl, 7
356  store volatile i64 %srl, i64 addrspace(1)* %out
357  store volatile i64 %bit, i64 addrspace(1)* %out
358  ret void
359}
360
361; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
362; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
363; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
364; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
365; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
366; GCN: buffer_store_dword v[[ZERO]]
367define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
368  %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
369  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
370  %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
371  %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
372  %ld.64 = load i64, i64 addrspace(1)* %in.gep
373  %srl = lshr i64 %ld.64, 33
374  %bit = and i64 %srl, 7
375  store volatile i64 %bit, i64 addrspace(1)* %out0.gep
376
377  %srl.srl32 = lshr i64 %srl, 32
378  %srl.hi = trunc i64 %srl.srl32 to i32
379  store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
380  ret void
381}
382
383declare i32 @llvm.amdgcn.workitem.id.x() #0
384
385attributes #0 = { nounwind readnone }
386attributes #1 = { nounwind }
387