1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,SI
3; RUN: llc < %s -march=amdgcn -mcpu=tonga  -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,VI
4; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,GFX9
5; RUN: llc < %s -march=r600 -mcpu=redwood  -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,R600
6
7declare i32 @llvm.fshr.i32(i32, i32, i32)
8declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
9declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>)
10declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
13declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
14declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
15declare i64 @llvm.fshr.i64(i64, i64, i64)
16declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
17declare i24 @llvm.fshr.i24(i24, i24, i24)
18declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>)
19
20define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
21; SI-LABEL: fshr_i32:
22; SI:       ; %bb.0: ; %entry
23; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
24; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
25; SI-NEXT:    s_load_dword s0, s[0:1], 0xd
26; SI-NEXT:    s_mov_b32 s7, 0xf000
27; SI-NEXT:    s_mov_b32 s6, -1
28; SI-NEXT:    s_waitcnt lgkmcnt(0)
29; SI-NEXT:    v_mov_b32_e32 v0, s3
30; SI-NEXT:    v_mov_b32_e32 v1, s0
31; SI-NEXT:    v_alignbit_b32 v0, s2, v0, v1
32; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
33; SI-NEXT:    s_endpgm
34;
35; VI-LABEL: fshr_i32:
36; VI:       ; %bb.0: ; %entry
37; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
38; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
39; VI-NEXT:    s_load_dword s0, s[0:1], 0x34
40; VI-NEXT:    s_waitcnt lgkmcnt(0)
41; VI-NEXT:    v_mov_b32_e32 v0, s5
42; VI-NEXT:    v_mov_b32_e32 v1, s0
43; VI-NEXT:    v_alignbit_b32 v2, s4, v0, v1
44; VI-NEXT:    v_mov_b32_e32 v0, s2
45; VI-NEXT:    v_mov_b32_e32 v1, s3
46; VI-NEXT:    flat_store_dword v[0:1], v2
47; VI-NEXT:    s_endpgm
48;
49; GFX9-LABEL: fshr_i32:
50; GFX9:       ; %bb.0: ; %entry
51; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
52; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
53; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x34
54; GFX9-NEXT:    v_mov_b32_e32 v0, 0
55; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
56; GFX9-NEXT:    v_mov_b32_e32 v1, s5
57; GFX9-NEXT:    v_mov_b32_e32 v2, s0
58; GFX9-NEXT:    v_alignbit_b32 v1, s4, v1, v2
59; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
60; GFX9-NEXT:    s_endpgm
61;
62; R600-LABEL: fshr_i32:
63; R600:       ; %bb.0: ; %entry
64; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
65; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
66; R600-NEXT:    CF_END
67; R600-NEXT:    PAD
68; R600-NEXT:    ALU clause starting at 4:
69; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
70; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
71; R600-NEXT:     BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
72entry:
73  %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
74  store i32 %0, i32 addrspace(1)* %in
75  ret void
76}
77
78define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
79; SI-LABEL: fshr_i32_imm:
80; SI:       ; %bb.0: ; %entry
81; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
82; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
83; SI-NEXT:    s_mov_b32 s7, 0xf000
84; SI-NEXT:    s_mov_b32 s6, -1
85; SI-NEXT:    s_waitcnt lgkmcnt(0)
86; SI-NEXT:    v_mov_b32_e32 v0, s1
87; SI-NEXT:    v_alignbit_b32 v0, s0, v0, 7
88; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
89; SI-NEXT:    s_endpgm
90;
91; VI-LABEL: fshr_i32_imm:
92; VI:       ; %bb.0: ; %entry
93; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
94; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
95; VI-NEXT:    s_waitcnt lgkmcnt(0)
96; VI-NEXT:    v_mov_b32_e32 v0, s1
97; VI-NEXT:    v_alignbit_b32 v2, s0, v0, 7
98; VI-NEXT:    v_mov_b32_e32 v0, s2
99; VI-NEXT:    v_mov_b32_e32 v1, s3
100; VI-NEXT:    flat_store_dword v[0:1], v2
101; VI-NEXT:    s_endpgm
102;
103; GFX9-LABEL: fshr_i32_imm:
104; GFX9:       ; %bb.0: ; %entry
105; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
106; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
107; GFX9-NEXT:    v_mov_b32_e32 v0, 0
108; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
109; GFX9-NEXT:    v_mov_b32_e32 v1, s1
110; GFX9-NEXT:    v_alignbit_b32 v1, s0, v1, 7
111; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
112; GFX9-NEXT:    s_endpgm
113;
114; R600-LABEL: fshr_i32_imm:
115; R600:       ; %bb.0: ; %entry
116; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
117; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
118; R600-NEXT:    CF_END
119; R600-NEXT:    PAD
120; R600-NEXT:    ALU clause starting at 4:
121; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
122; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
123; R600-NEXT:     BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x,
124; R600-NEXT:    7(9.809089e-45), 0(0.000000e+00)
125entry:
126  %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
127  store i32 %0, i32 addrspace(1)* %in
128  ret void
129}
130
131define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
132; SI-LABEL: fshr_v2i32:
133; SI:       ; %bb.0: ; %entry
134; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
135; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
136; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
137; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
138; SI-NEXT:    s_mov_b32 s7, 0xf000
139; SI-NEXT:    s_mov_b32 s6, -1
140; SI-NEXT:    s_waitcnt lgkmcnt(0)
141; SI-NEXT:    v_mov_b32_e32 v0, s9
142; SI-NEXT:    v_mov_b32_e32 v1, s1
143; SI-NEXT:    v_alignbit_b32 v1, s3, v0, v1
144; SI-NEXT:    v_mov_b32_e32 v0, s8
145; SI-NEXT:    v_mov_b32_e32 v2, s0
146; SI-NEXT:    v_alignbit_b32 v0, s2, v0, v2
147; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
148; SI-NEXT:    s_endpgm
149;
150; VI-LABEL: fshr_v2i32:
151; VI:       ; %bb.0: ; %entry
152; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
153; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
154; VI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x34
155; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
156; VI-NEXT:    s_waitcnt lgkmcnt(0)
157; VI-NEXT:    v_mov_b32_e32 v0, s7
158; VI-NEXT:    v_mov_b32_e32 v1, s1
159; VI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
160; VI-NEXT:    v_mov_b32_e32 v0, s6
161; VI-NEXT:    v_mov_b32_e32 v2, s0
162; VI-NEXT:    v_alignbit_b32 v0, s4, v0, v2
163; VI-NEXT:    v_mov_b32_e32 v2, s2
164; VI-NEXT:    v_mov_b32_e32 v3, s3
165; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
166; VI-NEXT:    s_endpgm
167;
168; GFX9-LABEL: fshr_v2i32:
169; GFX9:       ; %bb.0: ; %entry
170; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
171; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
172; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x34
173; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
174; GFX9-NEXT:    v_mov_b32_e32 v2, 0
175; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
176; GFX9-NEXT:    v_mov_b32_e32 v0, s7
177; GFX9-NEXT:    v_mov_b32_e32 v1, s1
178; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, v1
179; GFX9-NEXT:    v_mov_b32_e32 v0, s6
180; GFX9-NEXT:    v_mov_b32_e32 v3, s0
181; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, v3
182; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
183; GFX9-NEXT:    s_endpgm
184;
185; R600-LABEL: fshr_v2i32:
186; R600:       ; %bb.0: ; %entry
187; R600-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
188; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
189; R600-NEXT:    CF_END
190; R600-NEXT:    PAD
191; R600-NEXT:    ALU clause starting at 4:
192; R600-NEXT:     MOV * T0.W, KC0[4].X,
193; R600-NEXT:     BIT_ALIGN_INT T0.Y, KC0[3].X, KC0[3].Z, PV.W,
194; R600-NEXT:     MOV * T0.W, KC0[3].W,
195; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, PV.W,
196; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
197; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
198entry:
199  %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
200  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
201  ret void
202}
203
204define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
205; SI-LABEL: fshr_v2i32_imm:
206; SI:       ; %bb.0: ; %entry
207; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
208; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
209; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
210; SI-NEXT:    s_mov_b32 s7, 0xf000
211; SI-NEXT:    s_mov_b32 s6, -1
212; SI-NEXT:    s_waitcnt lgkmcnt(0)
213; SI-NEXT:    v_mov_b32_e32 v0, s1
214; SI-NEXT:    v_alignbit_b32 v1, s3, v0, 9
215; SI-NEXT:    v_mov_b32_e32 v0, s0
216; SI-NEXT:    v_alignbit_b32 v0, s2, v0, 7
217; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
218; SI-NEXT:    s_endpgm
219;
220; VI-LABEL: fshr_v2i32_imm:
221; VI:       ; %bb.0: ; %entry
222; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
223; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
224; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
225; VI-NEXT:    s_waitcnt lgkmcnt(0)
226; VI-NEXT:    v_mov_b32_e32 v0, s1
227; VI-NEXT:    v_mov_b32_e32 v2, s0
228; VI-NEXT:    v_alignbit_b32 v1, s5, v0, 9
229; VI-NEXT:    v_alignbit_b32 v0, s4, v2, 7
230; VI-NEXT:    v_mov_b32_e32 v2, s2
231; VI-NEXT:    v_mov_b32_e32 v3, s3
232; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
233; VI-NEXT:    s_endpgm
234;
235; GFX9-LABEL: fshr_v2i32_imm:
236; GFX9:       ; %bb.0: ; %entry
237; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
238; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
239; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
240; GFX9-NEXT:    v_mov_b32_e32 v2, 0
241; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX9-NEXT:    v_mov_b32_e32 v0, s1
243; GFX9-NEXT:    v_mov_b32_e32 v3, s0
244; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 9
245; GFX9-NEXT:    v_alignbit_b32 v0, s4, v3, 7
246; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
247; GFX9-NEXT:    s_endpgm
248;
249; R600-LABEL: fshr_v2i32_imm:
250; R600:       ; %bb.0: ; %entry
251; R600-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
252; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
253; R600-NEXT:    CF_END
254; R600-NEXT:    PAD
255; R600-NEXT:    ALU clause starting at 4:
256; R600-NEXT:     BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x,
257; R600-NEXT:    9(1.261169e-44), 0(0.000000e+00)
258; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x,
259; R600-NEXT:    7(9.809089e-45), 0(0.000000e+00)
260; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
261; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
262entry:
263  %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
264  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
265  ret void
266}
267
268define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
269; SI-LABEL: fshr_v4i32:
270; SI:       ; %bb.0: ; %entry
271; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
272; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
273; SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x11
274; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x15
275; SI-NEXT:    s_mov_b32 s7, 0xf000
276; SI-NEXT:    s_mov_b32 s6, -1
277; SI-NEXT:    s_waitcnt lgkmcnt(0)
278; SI-NEXT:    v_mov_b32_e32 v0, s15
279; SI-NEXT:    v_mov_b32_e32 v1, s3
280; SI-NEXT:    v_alignbit_b32 v3, s11, v0, v1
281; SI-NEXT:    v_mov_b32_e32 v0, s14
282; SI-NEXT:    v_mov_b32_e32 v1, s2
283; SI-NEXT:    v_alignbit_b32 v2, s10, v0, v1
284; SI-NEXT:    v_mov_b32_e32 v0, s13
285; SI-NEXT:    v_mov_b32_e32 v1, s1
286; SI-NEXT:    v_alignbit_b32 v1, s9, v0, v1
287; SI-NEXT:    v_mov_b32_e32 v0, s12
288; SI-NEXT:    v_mov_b32_e32 v4, s0
289; SI-NEXT:    v_alignbit_b32 v0, s8, v0, v4
290; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
291; SI-NEXT:    s_endpgm
292;
293; VI-LABEL: fshr_v4i32:
294; VI:       ; %bb.0: ; %entry
295; VI-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x24
296; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
297; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
298; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x54
299; VI-NEXT:    s_waitcnt lgkmcnt(0)
300; VI-NEXT:    v_mov_b32_e32 v0, s11
301; VI-NEXT:    v_mov_b32_e32 v1, s3
302; VI-NEXT:    v_alignbit_b32 v3, s7, v0, v1
303; VI-NEXT:    v_mov_b32_e32 v0, s10
304; VI-NEXT:    v_mov_b32_e32 v1, s2
305; VI-NEXT:    v_alignbit_b32 v2, s6, v0, v1
306; VI-NEXT:    v_mov_b32_e32 v0, s9
307; VI-NEXT:    v_mov_b32_e32 v1, s1
308; VI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
309; VI-NEXT:    v_mov_b32_e32 v0, s8
310; VI-NEXT:    v_mov_b32_e32 v4, s0
311; VI-NEXT:    v_alignbit_b32 v0, s4, v0, v4
312; VI-NEXT:    v_mov_b32_e32 v4, s12
313; VI-NEXT:    v_mov_b32_e32 v5, s13
314; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
315; VI-NEXT:    s_endpgm
316;
317; GFX9-LABEL: fshr_v4i32:
318; GFX9:       ; %bb.0: ; %entry
319; GFX9-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x24
320; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
321; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
322; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x54
323; GFX9-NEXT:    v_mov_b32_e32 v4, 0
324; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
325; GFX9-NEXT:    v_mov_b32_e32 v0, s11
326; GFX9-NEXT:    v_mov_b32_e32 v1, s3
327; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, v1
328; GFX9-NEXT:    v_mov_b32_e32 v0, s10
329; GFX9-NEXT:    v_mov_b32_e32 v1, s2
330; GFX9-NEXT:    v_alignbit_b32 v2, s6, v0, v1
331; GFX9-NEXT:    v_mov_b32_e32 v0, s9
332; GFX9-NEXT:    v_mov_b32_e32 v1, s1
333; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, v1
334; GFX9-NEXT:    v_mov_b32_e32 v0, s8
335; GFX9-NEXT:    v_mov_b32_e32 v5, s0
336; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, v5
337; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[12:13]
338; GFX9-NEXT:    s_endpgm
339;
340; R600-LABEL: fshr_v4i32:
341; R600:       ; %bb.0: ; %entry
342; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
343; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
344; R600-NEXT:    CF_END
345; R600-NEXT:    PAD
346; R600-NEXT:    ALU clause starting at 4:
347; R600-NEXT:     MOV * T0.W, KC0[6].X,
348; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W,
349; R600-NEXT:     MOV * T1.W, KC0[5].W,
350; R600-NEXT:     BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, PV.W,
351; R600-NEXT:     MOV * T1.W, KC0[5].Z,
352; R600-NEXT:     BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, PV.W,
353; R600-NEXT:     MOV * T1.W, KC0[5].Y,
354; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, PV.W,
355; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
356; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
357entry:
358  %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
359  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
360  ret void
361}
362
363define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
364; SI-LABEL: fshr_v4i32_imm:
365; SI:       ; %bb.0: ; %entry
366; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
367; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
368; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x11
369; SI-NEXT:    s_mov_b32 s7, 0xf000
370; SI-NEXT:    s_mov_b32 s6, -1
371; SI-NEXT:    s_waitcnt lgkmcnt(0)
372; SI-NEXT:    v_mov_b32_e32 v0, s3
373; SI-NEXT:    v_alignbit_b32 v3, s11, v0, 1
374; SI-NEXT:    v_mov_b32_e32 v0, s2
375; SI-NEXT:    v_alignbit_b32 v2, s10, v0, 9
376; SI-NEXT:    v_mov_b32_e32 v0, s1
377; SI-NEXT:    v_alignbit_b32 v1, s9, v0, 7
378; SI-NEXT:    v_mov_b32_e32 v0, s0
379; SI-NEXT:    v_alignbit_b32 v0, s8, v0, 1
380; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
381; SI-NEXT:    s_endpgm
382;
383; VI-LABEL: fshr_v4i32_imm:
384; VI:       ; %bb.0: ; %entry
385; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
386; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
387; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x44
388; VI-NEXT:    s_waitcnt lgkmcnt(0)
389; VI-NEXT:    v_mov_b32_e32 v4, s8
390; VI-NEXT:    v_mov_b32_e32 v5, s9
391; VI-NEXT:    v_mov_b32_e32 v0, s3
392; VI-NEXT:    v_mov_b32_e32 v1, s2
393; VI-NEXT:    v_alignbit_b32 v3, s7, v0, 1
394; VI-NEXT:    v_mov_b32_e32 v0, s1
395; VI-NEXT:    v_alignbit_b32 v2, s6, v1, 9
396; VI-NEXT:    v_alignbit_b32 v1, s5, v0, 7
397; VI-NEXT:    v_mov_b32_e32 v0, s0
398; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
399; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
400; VI-NEXT:    s_endpgm
401;
402; GFX9-LABEL: fshr_v4i32_imm:
403; GFX9:       ; %bb.0: ; %entry
404; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
405; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
406; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x44
407; GFX9-NEXT:    v_mov_b32_e32 v4, 0
408; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
409; GFX9-NEXT:    v_mov_b32_e32 v0, s3
410; GFX9-NEXT:    v_mov_b32_e32 v1, s2
411; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, 1
412; GFX9-NEXT:    v_mov_b32_e32 v0, s1
413; GFX9-NEXT:    v_alignbit_b32 v2, s6, v1, 9
414; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 7
415; GFX9-NEXT:    v_mov_b32_e32 v0, s0
416; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
417; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
418; GFX9-NEXT:    s_endpgm
419;
420; R600-LABEL: fshr_v4i32_imm:
421; R600:       ; %bb.0: ; %entry
422; R600-NEXT:    ALU 7, @4, KC0[CB0:0-32], KC1[]
423; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
424; R600-NEXT:    CF_END
425; R600-NEXT:    PAD
426; R600-NEXT:    ALU clause starting at 4:
427; R600-NEXT:     BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, 1,
428; R600-NEXT:     BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x,
429; R600-NEXT:    9(1.261169e-44), 0(0.000000e+00)
430; R600-NEXT:     BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x,
431; R600-NEXT:    7(9.809089e-45), 0(0.000000e+00)
432; R600-NEXT:     BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, 1,
433; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
434; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
435entry:
436  %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
437  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
438  ret void
439}
440
441define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) {
442; GFX89-LABEL: v_fshr_i32:
443; GFX89:       ; %bb.0:
444; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX89-NEXT:    v_alignbit_b32 v0, v0, v1, v2
446; GFX89-NEXT:    s_setpc_b64 s[30:31]
447;
448; R600-LABEL: v_fshr_i32:
449; R600:       ; %bb.0:
450; R600-NEXT:    CF_END
451; R600-NEXT:    PAD
452  %ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2)
453  ret i32 %ret
454}
455
456define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) {
457; GFX89-LABEL: v_fshr_v2i32:
458; GFX89:       ; %bb.0:
459; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460; GFX89-NEXT:    v_alignbit_b32 v0, v0, v2, v4
461; GFX89-NEXT:    v_alignbit_b32 v1, v1, v3, v5
462; GFX89-NEXT:    s_setpc_b64 s[30:31]
463;
464; R600-LABEL: v_fshr_v2i32:
465; R600:       ; %bb.0:
466; R600-NEXT:    CF_END
467; R600-NEXT:    PAD
468  %ret = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2)
469  ret <2 x i32> %ret
470}
471
472define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) {
473; GFX89-LABEL: v_fshr_v3i32:
474; GFX89:       ; %bb.0:
475; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX89-NEXT:    v_alignbit_b32 v0, v0, v3, v6
477; GFX89-NEXT:    v_alignbit_b32 v1, v1, v4, v7
478; GFX89-NEXT:    v_alignbit_b32 v2, v2, v5, v8
479; GFX89-NEXT:    s_setpc_b64 s[30:31]
480;
481; R600-LABEL: v_fshr_v3i32:
482; R600:       ; %bb.0:
483; R600-NEXT:    CF_END
484; R600-NEXT:    PAD
485  %ret = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2)
486  ret <3 x i32> %ret
487}
488
489define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) {
490; GFX89-LABEL: v_fshr_v4i32:
491; GFX89:       ; %bb.0:
492; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; GFX89-NEXT:    v_alignbit_b32 v0, v0, v4, v8
494; GFX89-NEXT:    v_alignbit_b32 v1, v1, v5, v9
495; GFX89-NEXT:    v_alignbit_b32 v2, v2, v6, v10
496; GFX89-NEXT:    v_alignbit_b32 v3, v3, v7, v11
497; GFX89-NEXT:    s_setpc_b64 s[30:31]
498;
499; R600-LABEL: v_fshr_v4i32:
500; R600:       ; %bb.0:
501; R600-NEXT:    CF_END
502; R600-NEXT:    PAD
503  %ret = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2)
504  ret <4 x i32> %ret
505}
506
507define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) {
508; SI-LABEL: v_fshr_i16:
509; SI:       ; %bb.0:
510; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; SI-NEXT:    v_or_b32_e32 v2, 16, v2
512; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
513; SI-NEXT:    v_alignbit_b32 v0, v0, v1, v2
514; SI-NEXT:    s_setpc_b64 s[30:31]
515;
516; VI-LABEL: v_fshr_i16:
517; VI:       ; %bb.0:
518; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; VI-NEXT:    v_xor_b32_e32 v3, -1, v2
520; VI-NEXT:    v_and_b32_e32 v2, 15, v2
521; VI-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
522; VI-NEXT:    v_and_b32_e32 v3, 15, v3
523; VI-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
524; VI-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
525; VI-NEXT:    v_or_b32_e32 v0, v0, v1
526; VI-NEXT:    s_setpc_b64 s[30:31]
527;
528; GFX9-LABEL: v_fshr_i16:
529; GFX9:       ; %bb.0:
530; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v2
532; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
533; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
534; GFX9-NEXT:    v_and_b32_e32 v3, 15, v3
535; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
536; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
537; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
538; GFX9-NEXT:    s_setpc_b64 s[30:31]
539;
540; R600-LABEL: v_fshr_i16:
541; R600:       ; %bb.0:
542; R600-NEXT:    CF_END
543; R600-NEXT:    PAD
544  %ret = call i16 @llvm.fshr.i16(i16 %src0, i16 %src1, i16 %src2)
545  ret i16 %ret
546}
547
548define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) {
549; SI-LABEL: v_fshr_v2i16:
550; SI:       ; %bb.0:
551; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; SI-NEXT:    v_or_b32_e32 v5, 16, v5
553; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
554; SI-NEXT:    v_alignbit_b32 v1, v1, v3, v5
555; SI-NEXT:    v_or_b32_e32 v3, 16, v4
556; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
557; SI-NEXT:    v_alignbit_b32 v0, v0, v2, v3
558; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
559; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
560; SI-NEXT:    v_or_b32_e32 v0, v0, v1
561; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
562; SI-NEXT:    s_setpc_b64 s[30:31]
563;
564; VI-LABEL: v_fshr_v2i16:
565; VI:       ; %bb.0:
566; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
568; VI-NEXT:    v_and_b32_e32 v4, 15, v3
569; VI-NEXT:    v_mov_b32_e32 v5, 1
570; VI-NEXT:    v_xor_b32_e32 v3, -1, v3
571; VI-NEXT:    v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
572; VI-NEXT:    v_and_b32_e32 v3, 15, v3
573; VI-NEXT:    v_lshrrev_b16_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
574; VI-NEXT:    v_lshlrev_b16_e32 v3, v3, v5
575; VI-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
576; VI-NEXT:    v_xor_b32_e32 v4, -1, v2
577; VI-NEXT:    v_and_b32_e32 v2, 15, v2
578; VI-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
579; VI-NEXT:    v_and_b32_e32 v4, 15, v4
580; VI-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
581; VI-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
582; VI-NEXT:    v_or_b32_e32 v0, v0, v1
583; VI-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
584; VI-NEXT:    s_setpc_b64 s[30:31]
585;
586; GFX9-LABEL: v_fshr_v2i16:
587; GFX9:       ; %bb.0:
588; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v2
590; GFX9-NEXT:    s_mov_b32 s4, 0xf000f
591; GFX9-NEXT:    v_and_b32_e32 v2, s4, v2
592; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
593; GFX9-NEXT:    v_and_b32_e32 v3, s4, v3
594; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
595; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
596; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
597; GFX9-NEXT:    s_setpc_b64 s[30:31]
598;
599; R600-LABEL: v_fshr_v2i16:
600; R600:       ; %bb.0:
601; R600-NEXT:    CF_END
602; R600-NEXT:    PAD
603  %ret = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2)
604  ret <2 x i16> %ret
605}
606
607define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) {
608; SI-LABEL: v_fshr_v3i16:
609; SI:       ; %bb.0:
610; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; SI-NEXT:    v_or_b32_e32 v7, 16, v7
612; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
613; SI-NEXT:    v_alignbit_b32 v1, v1, v4, v7
614; SI-NEXT:    v_or_b32_e32 v4, 16, v6
615; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
616; SI-NEXT:    v_alignbit_b32 v0, v0, v3, v4
617; SI-NEXT:    s_mov_b32 s4, 0xffff
618; SI-NEXT:    v_or_b32_e32 v3, 16, v8
619; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
620; SI-NEXT:    v_alignbit_b32 v3, v2, v4, v3
621; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
622; SI-NEXT:    v_and_b32_e32 v0, s4, v0
623; SI-NEXT:    v_or_b32_e32 v0, v0, v1
624; SI-NEXT:    v_and_b32_e32 v2, s4, v3
625; SI-NEXT:    v_alignbit_b32 v1, v3, v1, 16
626; SI-NEXT:    s_setpc_b64 s[30:31]
627;
628; VI-LABEL: v_fshr_v3i16:
629; VI:       ; %bb.0:
630; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
632; VI-NEXT:    v_and_b32_e32 v7, 15, v6
633; VI-NEXT:    v_mov_b32_e32 v8, 1
634; VI-NEXT:    v_xor_b32_e32 v6, -1, v6
635; VI-NEXT:    v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
636; VI-NEXT:    v_and_b32_e32 v6, 15, v6
637; VI-NEXT:    v_lshrrev_b16_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
638; VI-NEXT:    v_lshlrev_b16_e32 v6, v6, v8
639; VI-NEXT:    v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
640; VI-NEXT:    v_xor_b32_e32 v7, -1, v5
641; VI-NEXT:    v_and_b32_e32 v5, 15, v5
642; VI-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
643; VI-NEXT:    v_and_b32_e32 v7, 15, v7
644; VI-NEXT:    v_lshlrev_b16_e32 v1, v7, v1
645; VI-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
646; VI-NEXT:    v_or_b32_e32 v1, v1, v3
647; VI-NEXT:    v_xor_b32_e32 v3, -1, v4
648; VI-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
649; VI-NEXT:    v_and_b32_e32 v3, 15, v3
650; VI-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
651; VI-NEXT:    v_and_b32_e32 v3, 15, v4
652; VI-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
653; VI-NEXT:    v_or_b32_e32 v0, v0, v2
654; VI-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
655; VI-NEXT:    s_setpc_b64 s[30:31]
656;
657; GFX9-LABEL: v_fshr_v3i16:
658; GFX9:       ; %bb.0:
659; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
661; GFX9-NEXT:    v_and_b32_e32 v7, 15, v6
662; GFX9-NEXT:    v_mov_b32_e32 v8, 1
663; GFX9-NEXT:    v_xor_b32_e32 v6, -1, v6
664; GFX9-NEXT:    v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
665; GFX9-NEXT:    v_and_b32_e32 v6, 15, v6
666; GFX9-NEXT:    v_lshrrev_b16_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
667; GFX9-NEXT:    v_lshlrev_b16_e32 v6, v6, v8
668; GFX9-NEXT:    v_or_b32_e32 v6, v6, v7
669; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v5
670; GFX9-NEXT:    v_and_b32_e32 v5, 15, v5
671; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
672; GFX9-NEXT:    v_and_b32_e32 v7, 15, v7
673; GFX9-NEXT:    v_lshlrev_b16_e32 v1, v7, v1
674; GFX9-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
675; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
676; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v4
677; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
678; GFX9-NEXT:    v_and_b32_e32 v3, 15, v3
679; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
680; GFX9-NEXT:    v_and_b32_e32 v3, 15, v4
681; GFX9-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
682; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
683; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
684; GFX9-NEXT:    v_lshl_or_b32 v0, v6, 16, v0
685; GFX9-NEXT:    s_setpc_b64 s[30:31]
686;
687; R600-LABEL: v_fshr_v3i16:
688; R600:       ; %bb.0:
689; R600-NEXT:    CF_END
690; R600-NEXT:    PAD
691  %ret = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2)
692  ret <3 x i16> %ret
693}
694
695define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) {
696; SI-LABEL: v_fshr_v4i16:
697; SI:       ; %bb.0:
698; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699; SI-NEXT:    v_or_b32_e32 v9, 16, v9
700; SI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
701; SI-NEXT:    v_alignbit_b32 v1, v1, v5, v9
702; SI-NEXT:    v_or_b32_e32 v5, 16, v8
703; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
704; SI-NEXT:    v_alignbit_b32 v0, v0, v4, v5
705; SI-NEXT:    v_or_b32_e32 v4, 16, v11
706; SI-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
707; SI-NEXT:    v_alignbit_b32 v3, v3, v5, v4
708; SI-NEXT:    v_or_b32_e32 v4, 16, v10
709; SI-NEXT:    v_lshlrev_b32_e32 v5, 16, v6
710; SI-NEXT:    s_mov_b32 s4, 0xffff
711; SI-NEXT:    v_alignbit_b32 v2, v2, v5, v4
712; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
713; SI-NEXT:    v_and_b32_e32 v2, s4, v2
714; SI-NEXT:    v_or_b32_e32 v2, v2, v3
715; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
716; SI-NEXT:    v_and_b32_e32 v0, s4, v0
717; SI-NEXT:    v_or_b32_e32 v0, v0, v1
718; SI-NEXT:    v_alignbit_b32 v1, v2, v1, 16
719; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
720; SI-NEXT:    s_setpc_b64 s[30:31]
721;
722; VI-LABEL: v_fshr_v4i16:
723; VI:       ; %bb.0:
724; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
726; VI-NEXT:    v_and_b32_e32 v7, 15, v6
727; VI-NEXT:    v_xor_b32_e32 v6, -1, v6
728; VI-NEXT:    v_mov_b32_e32 v8, 1
729; VI-NEXT:    v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
730; VI-NEXT:    v_and_b32_e32 v6, 15, v6
731; VI-NEXT:    v_lshrrev_b16_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
732; VI-NEXT:    v_lshlrev_b16_e32 v6, v6, v9
733; VI-NEXT:    v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
734; VI-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
735; VI-NEXT:    v_and_b32_e32 v9, 15, v7
736; VI-NEXT:    v_xor_b32_e32 v7, -1, v7
737; VI-NEXT:    v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
738; VI-NEXT:    v_and_b32_e32 v7, 15, v7
739; VI-NEXT:    v_lshlrev_b16_e32 v7, v7, v8
740; VI-NEXT:    v_xor_b32_e32 v8, -1, v5
741; VI-NEXT:    v_and_b32_e32 v5, 15, v5
742; VI-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
743; VI-NEXT:    v_and_b32_e32 v8, 15, v8
744; VI-NEXT:    v_lshlrev_b16_e32 v1, v8, v1
745; VI-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
746; VI-NEXT:    v_or_b32_e32 v1, v1, v3
747; VI-NEXT:    v_xor_b32_e32 v3, -1, v4
748; VI-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
749; VI-NEXT:    v_and_b32_e32 v3, 15, v3
750; VI-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
751; VI-NEXT:    v_and_b32_e32 v3, 15, v4
752; VI-NEXT:    v_lshrrev_b16_sdwa v9, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
753; VI-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
754; VI-NEXT:    v_or_b32_sdwa v7, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
755; VI-NEXT:    v_or_b32_e32 v0, v0, v2
756; VI-NEXT:    v_or_b32_sdwa v0, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
757; VI-NEXT:    v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
758; VI-NEXT:    s_setpc_b64 s[30:31]
759;
760; GFX9-LABEL: v_fshr_v4i16:
761; GFX9:       ; %bb.0:
762; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
764; GFX9-NEXT:    v_and_b32_e32 v7, 15, v6
765; GFX9-NEXT:    v_xor_b32_e32 v6, -1, v6
766; GFX9-NEXT:    v_mov_b32_e32 v8, 1
767; GFX9-NEXT:    v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
768; GFX9-NEXT:    v_and_b32_e32 v6, 15, v6
769; GFX9-NEXT:    v_lshrrev_b16_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
770; GFX9-NEXT:    v_lshlrev_b16_e32 v6, v6, v9
771; GFX9-NEXT:    v_or_b32_e32 v6, v6, v7
772; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
773; GFX9-NEXT:    v_and_b32_e32 v9, 15, v7
774; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v7
775; GFX9-NEXT:    v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
776; GFX9-NEXT:    v_and_b32_e32 v7, 15, v7
777; GFX9-NEXT:    v_lshlrev_b16_e32 v7, v7, v8
778; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v5
779; GFX9-NEXT:    v_and_b32_e32 v5, 15, v5
780; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
781; GFX9-NEXT:    v_and_b32_e32 v8, 15, v8
782; GFX9-NEXT:    v_lshlrev_b16_e32 v1, v8, v1
783; GFX9-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
784; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
785; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v4
786; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
787; GFX9-NEXT:    v_and_b32_e32 v3, 15, v3
788; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
789; GFX9-NEXT:    v_and_b32_e32 v3, 15, v4
790; GFX9-NEXT:    v_lshrrev_b16_sdwa v9, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
791; GFX9-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
792; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
793; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
794; GFX9-NEXT:    v_and_b32_e32 v1, v2, v1
795; GFX9-NEXT:    v_or_b32_e32 v7, v7, v9
796; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
797; GFX9-NEXT:    v_lshl_or_b32 v0, v7, 16, v0
798; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 16, v1
799; GFX9-NEXT:    s_setpc_b64 s[30:31]
800;
801; R600-LABEL: v_fshr_v4i16:
802; R600:       ; %bb.0:
803; R600-NEXT:    CF_END
804; R600-NEXT:    PAD
805  %ret = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2)
806  ret <4 x i16> %ret
807}
808
809define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) {
810; SI-LABEL: v_fshr_i64:
811; SI:       ; %bb.0:
812; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; SI-NEXT:    v_and_b32_e32 v5, 63, v4
814; SI-NEXT:    v_not_b32_e32 v4, v4
815; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
816; SI-NEXT:    v_and_b32_e32 v4, 63, v4
817; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], v5
818; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
819; SI-NEXT:    v_or_b32_e32 v1, v1, v3
820; SI-NEXT:    v_or_b32_e32 v0, v0, v2
821; SI-NEXT:    s_setpc_b64 s[30:31]
822;
823; VI-LABEL: v_fshr_i64:
824; VI:       ; %bb.0:
825; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826; VI-NEXT:    v_and_b32_e32 v5, 63, v4
827; VI-NEXT:    v_not_b32_e32 v4, v4
828; VI-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
829; VI-NEXT:    v_and_b32_e32 v4, 63, v4
830; VI-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
831; VI-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
832; VI-NEXT:    v_or_b32_e32 v1, v1, v3
833; VI-NEXT:    v_or_b32_e32 v0, v0, v2
834; VI-NEXT:    s_setpc_b64 s[30:31]
835;
836; GFX9-LABEL: v_fshr_i64:
837; GFX9:       ; %bb.0:
838; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX9-NEXT:    v_and_b32_e32 v5, 63, v4
840; GFX9-NEXT:    v_not_b32_e32 v4, v4
841; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
842; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
843; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
844; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
845; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
846; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
847; GFX9-NEXT:    s_setpc_b64 s[30:31]
848;
849; R600-LABEL: v_fshr_i64:
850; R600:       ; %bb.0:
851; R600-NEXT:    CF_END
852; R600-NEXT:    PAD
853  %ret = call i64 @llvm.fshr.i64(i64 %src0, i64 %src1, i64 %src2)
854  ret i64 %ret
855}
856
857define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) {
858; SI-LABEL: v_fshr_v2i64:
859; SI:       ; %bb.0:
860; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
861; SI-NEXT:    v_and_b32_e32 v9, 63, v8
862; SI-NEXT:    v_not_b32_e32 v8, v8
863; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
864; SI-NEXT:    v_and_b32_e32 v8, 63, v8
865; SI-NEXT:    v_lshr_b64 v[4:5], v[4:5], v9
866; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], v8
867; SI-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
868; SI-NEXT:    v_or_b32_e32 v1, v1, v5
869; SI-NEXT:    v_and_b32_e32 v5, 63, v10
870; SI-NEXT:    v_lshr_b64 v[5:6], v[6:7], v5
871; SI-NEXT:    v_not_b32_e32 v7, v10
872; SI-NEXT:    v_and_b32_e32 v7, 63, v7
873; SI-NEXT:    v_lshl_b64 v[2:3], v[2:3], v7
874; SI-NEXT:    v_or_b32_e32 v0, v0, v4
875; SI-NEXT:    v_or_b32_e32 v3, v3, v6
876; SI-NEXT:    v_or_b32_e32 v2, v2, v5
877; SI-NEXT:    s_setpc_b64 s[30:31]
878;
879; VI-LABEL: v_fshr_v2i64:
880; VI:       ; %bb.0:
881; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882; VI-NEXT:    v_and_b32_e32 v9, 63, v8
883; VI-NEXT:    v_not_b32_e32 v8, v8
884; VI-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
885; VI-NEXT:    v_and_b32_e32 v8, 63, v8
886; VI-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
887; VI-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
888; VI-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
889; VI-NEXT:    v_or_b32_e32 v1, v1, v5
890; VI-NEXT:    v_and_b32_e32 v5, 63, v10
891; VI-NEXT:    v_lshrrev_b64 v[5:6], v5, v[6:7]
892; VI-NEXT:    v_not_b32_e32 v7, v10
893; VI-NEXT:    v_and_b32_e32 v7, 63, v7
894; VI-NEXT:    v_lshlrev_b64 v[2:3], v7, v[2:3]
895; VI-NEXT:    v_or_b32_e32 v0, v0, v4
896; VI-NEXT:    v_or_b32_e32 v3, v3, v6
897; VI-NEXT:    v_or_b32_e32 v2, v2, v5
898; VI-NEXT:    s_setpc_b64 s[30:31]
899;
900; GFX9-LABEL: v_fshr_v2i64:
901; GFX9:       ; %bb.0:
902; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903; GFX9-NEXT:    v_and_b32_e32 v9, 63, v8
904; GFX9-NEXT:    v_not_b32_e32 v8, v8
905; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
906; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
907; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
908; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
909; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
910; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
911; GFX9-NEXT:    v_and_b32_e32 v5, 63, v10
912; GFX9-NEXT:    v_lshrrev_b64 v[5:6], v5, v[6:7]
913; GFX9-NEXT:    v_not_b32_e32 v7, v10
914; GFX9-NEXT:    v_and_b32_e32 v7, 63, v7
915; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v7, v[2:3]
916; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
917; GFX9-NEXT:    v_or_b32_e32 v3, v3, v6
918; GFX9-NEXT:    v_or_b32_e32 v2, v2, v5
919; GFX9-NEXT:    s_setpc_b64 s[30:31]
920;
921; R600-LABEL: v_fshr_v2i64:
922; R600:       ; %bb.0:
923; R600-NEXT:    CF_END
924; R600-NEXT:    PAD
925  %ret = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2)
926  ret <2 x i64> %ret
927}
928
929define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
930; SI-LABEL: v_fshr_i24:
931; SI:       ; %bb.0:
932; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933; SI-NEXT:    s_mov_b32 s4, 0xaaaaaaab
934; SI-NEXT:    v_mul_hi_u32 v3, v2, s4
935; SI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
936; SI-NEXT:    v_lshrrev_b32_e32 v3, 4, v3
937; SI-NEXT:    v_mul_lo_u32 v3, v3, 24
938; SI-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
939; SI-NEXT:    v_add_i32_e32 v2, vcc, 8, v2
940; SI-NEXT:    v_alignbit_b32 v0, v0, v1, v2
941; SI-NEXT:    s_setpc_b64 s[30:31]
942;
943; VI-LABEL: v_fshr_i24:
944; VI:       ; %bb.0:
945; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946; VI-NEXT:    s_mov_b32 s4, 0xaaaaaaab
947; VI-NEXT:    v_mul_hi_u32 v3, v2, s4
948; VI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
949; VI-NEXT:    v_lshrrev_b32_e32 v3, 4, v3
950; VI-NEXT:    v_mul_lo_u32 v3, v3, 24
951; VI-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
952; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v2
953; VI-NEXT:    v_alignbit_b32 v0, v0, v1, v2
954; VI-NEXT:    s_setpc_b64 s[30:31]
955;
956; GFX9-LABEL: v_fshr_i24:
957; GFX9:       ; %bb.0:
958; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959; GFX9-NEXT:    s_mov_b32 s4, 0xaaaaaaab
960; GFX9-NEXT:    v_mul_hi_u32 v3, v2, s4
961; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
962; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 4, v3
963; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
964; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
965; GFX9-NEXT:    v_add_u32_e32 v2, 8, v2
966; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, v2
967; GFX9-NEXT:    s_setpc_b64 s[30:31]
968;
969; R600-LABEL: v_fshr_i24:
970; R600:       ; %bb.0:
971; R600-NEXT:    CF_END
972; R600-NEXT:    PAD
973  %ret = call i24 @llvm.fshr.i24(i24 %src0, i24 %src1, i24 %src2)
974  ret i24 %ret
975}
976
977define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) {
978; SI-LABEL: v_fshr_v2i24:
979; SI:       ; %bb.0:
980; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; SI-NEXT:    s_mov_b32 s4, 0xaaaaaaab
982; SI-NEXT:    v_mul_hi_u32 v6, v4, s4
983; SI-NEXT:    v_mul_hi_u32 v7, v5, s4
984; SI-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
985; SI-NEXT:    v_lshrrev_b32_e32 v6, 4, v6
986; SI-NEXT:    v_mul_lo_u32 v6, v6, 24
987; SI-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
988; SI-NEXT:    v_lshrrev_b32_e32 v6, 4, v7
989; SI-NEXT:    v_mul_lo_u32 v6, v6, 24
990; SI-NEXT:    v_add_i32_e32 v4, vcc, 8, v4
991; SI-NEXT:    v_alignbit_b32 v0, v0, v2, v4
992; SI-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
993; SI-NEXT:    v_sub_i32_e32 v3, vcc, v5, v6
994; SI-NEXT:    v_add_i32_e32 v3, vcc, 8, v3
995; SI-NEXT:    v_alignbit_b32 v1, v1, v2, v3
996; SI-NEXT:    s_setpc_b64 s[30:31]
997;
998; VI-LABEL: v_fshr_v2i24:
999; VI:       ; %bb.0:
1000; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001; VI-NEXT:    s_mov_b32 s4, 0xaaaaaaab
1002; VI-NEXT:    v_mul_hi_u32 v6, v4, s4
1003; VI-NEXT:    v_mul_hi_u32 v7, v5, s4
1004; VI-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1005; VI-NEXT:    v_lshrrev_b32_e32 v6, 4, v6
1006; VI-NEXT:    v_mul_lo_u32 v6, v6, 24
1007; VI-NEXT:    v_sub_u32_e32 v4, vcc, v4, v6
1008; VI-NEXT:    v_lshrrev_b32_e32 v6, 4, v7
1009; VI-NEXT:    v_mul_lo_u32 v6, v6, 24
1010; VI-NEXT:    v_add_u32_e32 v4, vcc, 8, v4
1011; VI-NEXT:    v_alignbit_b32 v0, v0, v2, v4
1012; VI-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
1013; VI-NEXT:    v_sub_u32_e32 v3, vcc, v5, v6
1014; VI-NEXT:    v_add_u32_e32 v3, vcc, 8, v3
1015; VI-NEXT:    v_alignbit_b32 v1, v1, v2, v3
1016; VI-NEXT:    s_setpc_b64 s[30:31]
1017;
1018; GFX9-LABEL: v_fshr_v2i24:
1019; GFX9:       ; %bb.0:
1020; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1021; GFX9-NEXT:    s_mov_b32 s4, 0xaaaaaaab
1022; GFX9-NEXT:    v_mul_hi_u32 v6, v4, s4
1023; GFX9-NEXT:    v_mul_hi_u32 v7, v5, s4
1024; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1025; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 4, v6
1026; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
1027; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v6
1028; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 4, v7
1029; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
1030; GFX9-NEXT:    v_add_u32_e32 v4, 8, v4
1031; GFX9-NEXT:    v_alignbit_b32 v0, v0, v2, v4
1032; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
1033; GFX9-NEXT:    v_sub_u32_e32 v3, v5, v6
1034; GFX9-NEXT:    v_add_u32_e32 v3, 8, v3
1035; GFX9-NEXT:    v_alignbit_b32 v1, v1, v2, v3
1036; GFX9-NEXT:    s_setpc_b64 s[30:31]
1037;
1038; R600-LABEL: v_fshr_v2i24:
1039; R600:       ; %bb.0:
1040; R600-NEXT:    CF_END
1041; R600-NEXT:    PAD
1042  %ret = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2)
1043  ret <2 x i24> %ret
1044}
1045