1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s
4; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
5
6; FIXME: Need constant bus fixup pre-gfx10 for movrel
7; ERR: Bad machine code: VOP* instruction violates constant bus restriction
8
9define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
10; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s:
11; GPRIDX:       ; %bb.0: ; %entry
12; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
13; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
14; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
15; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
16; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
17; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
18; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
19; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
20; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
21; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
22; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
23; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
24; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
25; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
26; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
27; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
28; GPRIDX-NEXT:    ; return to shader part epilog
29;
30; MOVREL-LABEL: dyn_insertelement_v8i32_s_s_s:
31; MOVREL:       ; %bb.0: ; %entry
32; MOVREL-NEXT:    s_cmp_eq_u32 s11, 0
33; MOVREL-NEXT:    s_cselect_b32 s0, s10, s2
34; MOVREL-NEXT:    s_cmp_eq_u32 s11, 1
35; MOVREL-NEXT:    s_cselect_b32 s1, s10, s3
36; MOVREL-NEXT:    s_cmp_eq_u32 s11, 2
37; MOVREL-NEXT:    s_cselect_b32 s2, s10, s4
38; MOVREL-NEXT:    s_cmp_eq_u32 s11, 3
39; MOVREL-NEXT:    s_cselect_b32 s3, s10, s5
40; MOVREL-NEXT:    s_cmp_eq_u32 s11, 4
41; MOVREL-NEXT:    s_cselect_b32 s4, s10, s6
42; MOVREL-NEXT:    s_cmp_eq_u32 s11, 5
43; MOVREL-NEXT:    s_cselect_b32 s5, s10, s7
44; MOVREL-NEXT:    s_cmp_eq_u32 s11, 6
45; MOVREL-NEXT:    s_cselect_b32 s6, s10, s8
46; MOVREL-NEXT:    s_cmp_eq_u32 s11, 7
47; MOVREL-NEXT:    s_cselect_b32 s7, s10, s9
48; MOVREL-NEXT:    ; return to shader part epilog
49entry:
50  %insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx
51  ret <8 x i32> %insert
52}
53
54define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
55; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
56; GPRIDX:       ; %bb.0: ; %entry
57; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
58; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
59; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
60; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
61; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
62; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
63; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
64; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
65; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
66; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
67; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
68; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
69; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
70; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
71; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
72; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
73; GPRIDX-NEXT:    ; return to shader part epilog
74;
75; MOVREL-LABEL: dyn_insertelement_v8p3i8_s_s_s:
76; MOVREL:       ; %bb.0: ; %entry
77; MOVREL-NEXT:    s_cmp_eq_u32 s11, 0
78; MOVREL-NEXT:    s_cselect_b32 s0, s10, s2
79; MOVREL-NEXT:    s_cmp_eq_u32 s11, 1
80; MOVREL-NEXT:    s_cselect_b32 s1, s10, s3
81; MOVREL-NEXT:    s_cmp_eq_u32 s11, 2
82; MOVREL-NEXT:    s_cselect_b32 s2, s10, s4
83; MOVREL-NEXT:    s_cmp_eq_u32 s11, 3
84; MOVREL-NEXT:    s_cselect_b32 s3, s10, s5
85; MOVREL-NEXT:    s_cmp_eq_u32 s11, 4
86; MOVREL-NEXT:    s_cselect_b32 s4, s10, s6
87; MOVREL-NEXT:    s_cmp_eq_u32 s11, 5
88; MOVREL-NEXT:    s_cselect_b32 s5, s10, s7
89; MOVREL-NEXT:    s_cmp_eq_u32 s11, 6
90; MOVREL-NEXT:    s_cselect_b32 s6, s10, s8
91; MOVREL-NEXT:    s_cmp_eq_u32 s11, 7
92; MOVREL-NEXT:    s_cselect_b32 s7, s10, s9
93; MOVREL-NEXT:    ; return to shader part epilog
94entry:
95  %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
96  ret <8 x i8 addrspace(3)*> %insert
97}
98
99define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
100; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v:
101; GPRIDX:       ; %bb.0: ; %entry
102; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GPRIDX-NEXT:    s_mov_b32 s11, 0x41000000
104; GPRIDX-NEXT:    s_mov_b32 s10, 0x40e00000
105; GPRIDX-NEXT:    s_mov_b32 s9, 0x40c00000
106; GPRIDX-NEXT:    s_mov_b32 s8, 0x40a00000
107; GPRIDX-NEXT:    s_mov_b32 s7, 4.0
108; GPRIDX-NEXT:    s_mov_b32 s6, 0x40400000
109; GPRIDX-NEXT:    s_mov_b32 s5, 2.0
110; GPRIDX-NEXT:    s_mov_b32 s4, 1.0
111; GPRIDX-NEXT:    v_mov_b32_e32 v15, s11
112; GPRIDX-NEXT:    v_mov_b32_e32 v8, s4
113; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
114; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
115; GPRIDX-NEXT:    v_mov_b32_e32 v9, s5
116; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
117; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc
118; GPRIDX-NEXT:    v_mov_b32_e32 v10, s6
119; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
120; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
121; GPRIDX-NEXT:    v_mov_b32_e32 v11, s7
122; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
123; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc
124; GPRIDX-NEXT:    v_mov_b32_e32 v12, s8
125; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
126; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc
127; GPRIDX-NEXT:    v_mov_b32_e32 v13, s9
128; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
129; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc
130; GPRIDX-NEXT:    v_mov_b32_e32 v14, s10
131; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
132; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc
133; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
134; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc
135; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
136; GPRIDX-NEXT:    v_mov_b32_e32 v1, v9
137; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
138;
139; MOVREL-LABEL: dyn_insertelement_v8f32_const_s_v_v:
140; MOVREL:       ; %bb.0: ; %entry
141; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; MOVREL-NEXT:    s_waitcnt_vscnt null, 0x0
143; MOVREL-NEXT:    s_mov_b32 s11, 0x41000000
144; MOVREL-NEXT:    s_mov_b32 s4, 1.0
145; MOVREL-NEXT:    s_mov_b32 s10, 0x40e00000
146; MOVREL-NEXT:    s_mov_b32 s9, 0x40c00000
147; MOVREL-NEXT:    s_mov_b32 s8, 0x40a00000
148; MOVREL-NEXT:    s_mov_b32 s7, 4.0
149; MOVREL-NEXT:    s_mov_b32 s6, 0x40400000
150; MOVREL-NEXT:    s_mov_b32 s5, 2.0
151; MOVREL-NEXT:    v_mov_b32_e32 v15, s11
152; MOVREL-NEXT:    v_mov_b32_e32 v8, s4
153; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
154; MOVREL-NEXT:    v_mov_b32_e32 v9, s5
155; MOVREL-NEXT:    v_mov_b32_e32 v10, s6
156; MOVREL-NEXT:    v_mov_b32_e32 v11, s7
157; MOVREL-NEXT:    v_mov_b32_e32 v12, s8
158; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
159; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
160; MOVREL-NEXT:    v_mov_b32_e32 v13, s9
161; MOVREL-NEXT:    v_mov_b32_e32 v14, s10
162; MOVREL-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc_lo
163; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
164; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
165; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
166; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc_lo
167; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
168; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc_lo
169; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
170; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc_lo
171; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
172; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc_lo
173; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
174; MOVREL-NEXT:    v_mov_b32_e32 v1, v9
175; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc_lo
176; MOVREL-NEXT:    v_mov_b32_e32 v0, v8
177; MOVREL-NEXT:    s_setpc_b64 s[30:31]
178entry:
179  %insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx
180  ret <8 x float> %insert
181}
182
183define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) {
184; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v:
185; GPRIDX:       ; %bb.0: ; %entry
186; GPRIDX-NEXT:    s_mov_b32 s1, s3
187; GPRIDX-NEXT:    s_mov_b32 s3, s5
188; GPRIDX-NEXT:    s_mov_b32 s5, s7
189; GPRIDX-NEXT:    s_mov_b32 s7, s9
190; GPRIDX-NEXT:    s_mov_b32 s0, s2
191; GPRIDX-NEXT:    s_mov_b32 s2, s4
192; GPRIDX-NEXT:    s_mov_b32 s4, s6
193; GPRIDX-NEXT:    s_mov_b32 s6, s8
194; GPRIDX-NEXT:    v_mov_b32_e32 v15, s7
195; GPRIDX-NEXT:    v_mov_b32_e32 v7, s10
196; GPRIDX-NEXT:    v_mov_b32_e32 v8, s0
197; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
198; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v7, vcc
199; GPRIDX-NEXT:    v_mov_b32_e32 v9, s1
200; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
201; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v9, v7, vcc
202; GPRIDX-NEXT:    v_mov_b32_e32 v10, s2
203; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
204; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v7, vcc
205; GPRIDX-NEXT:    v_mov_b32_e32 v11, s3
206; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v0
207; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v11, v7, vcc
208; GPRIDX-NEXT:    v_mov_b32_e32 v12, s4
209; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v0
210; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v12, v7, vcc
211; GPRIDX-NEXT:    v_mov_b32_e32 v13, s5
212; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v0
213; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v13, v7, vcc
214; GPRIDX-NEXT:    v_mov_b32_e32 v14, s6
215; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v0
216; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v14, v7, vcc
217; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v0
218; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v15, v7, vcc
219; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
220; GPRIDX-NEXT:    ; return to shader part epilog
221;
222; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_v:
223; MOVREL:       ; %bb.0: ; %entry
224; MOVREL-NEXT:    s_mov_b32 s1, s3
225; MOVREL-NEXT:    s_mov_b32 s3, s5
226; MOVREL-NEXT:    s_mov_b32 s5, s7
227; MOVREL-NEXT:    s_mov_b32 s7, s9
228; MOVREL-NEXT:    s_mov_b32 s0, s2
229; MOVREL-NEXT:    s_mov_b32 s2, s4
230; MOVREL-NEXT:    s_mov_b32 s4, s6
231; MOVREL-NEXT:    s_mov_b32 s6, s8
232; MOVREL-NEXT:    v_mov_b32_e32 v15, s7
233; MOVREL-NEXT:    v_mov_b32_e32 v7, s10
234; MOVREL-NEXT:    v_mov_b32_e32 v8, s0
235; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
236; MOVREL-NEXT:    v_mov_b32_e32 v9, s1
237; MOVREL-NEXT:    v_mov_b32_e32 v10, s2
238; MOVREL-NEXT:    v_mov_b32_e32 v11, s3
239; MOVREL-NEXT:    v_mov_b32_e32 v12, s4
240; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v7, vcc_lo
241; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
242; MOVREL-NEXT:    v_mov_b32_e32 v13, s5
243; MOVREL-NEXT:    v_mov_b32_e32 v14, s6
244; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v9, v7, vcc_lo
245; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v0
246; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v10, v7, vcc_lo
247; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
248; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v11, v7, vcc_lo
249; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v0
250; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v12, v7, vcc_lo
251; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v0
252; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v13, v7, vcc_lo
253; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v0
254; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v14, v7, vcc_lo
255; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v0
256; MOVREL-NEXT:    v_mov_b32_e32 v0, v8
257; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v15, v7, vcc_lo
258; MOVREL-NEXT:    ; return to shader part epilog
259entry:
260  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
261  ret <8 x float> %insert
262}
263
264define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) {
265; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s:
266; GPRIDX:       ; %bb.0: ; %entry
267; GPRIDX-NEXT:    s_mov_b32 s1, s3
268; GPRIDX-NEXT:    s_mov_b32 s3, s5
269; GPRIDX-NEXT:    s_mov_b32 s5, s7
270; GPRIDX-NEXT:    s_mov_b32 s7, s9
271; GPRIDX-NEXT:    s_mov_b32 s0, s2
272; GPRIDX-NEXT:    s_mov_b32 s2, s4
273; GPRIDX-NEXT:    s_mov_b32 s4, s6
274; GPRIDX-NEXT:    s_mov_b32 s6, s8
275; GPRIDX-NEXT:    v_mov_b32_e32 v15, s7
276; GPRIDX-NEXT:    v_mov_b32_e32 v8, s0
277; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 0
278; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
279; GPRIDX-NEXT:    v_mov_b32_e32 v9, s1
280; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 1
281; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc
282; GPRIDX-NEXT:    v_mov_b32_e32 v10, s2
283; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 2
284; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
285; GPRIDX-NEXT:    v_mov_b32_e32 v11, s3
286; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 3
287; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc
288; GPRIDX-NEXT:    v_mov_b32_e32 v12, s4
289; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 4
290; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc
291; GPRIDX-NEXT:    v_mov_b32_e32 v13, s5
292; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 5
293; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc
294; GPRIDX-NEXT:    v_mov_b32_e32 v14, s6
295; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 6
296; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc
297; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 7
298; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc
299; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
300; GPRIDX-NEXT:    ; return to shader part epilog
301;
302; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_s:
303; MOVREL:       ; %bb.0: ; %entry
304; MOVREL-NEXT:    s_mov_b32 s1, s3
305; MOVREL-NEXT:    s_mov_b32 s3, s5
306; MOVREL-NEXT:    s_mov_b32 s5, s7
307; MOVREL-NEXT:    s_mov_b32 s7, s9
308; MOVREL-NEXT:    s_mov_b32 s0, s2
309; MOVREL-NEXT:    s_mov_b32 s2, s4
310; MOVREL-NEXT:    s_mov_b32 s4, s6
311; MOVREL-NEXT:    s_mov_b32 s6, s8
312; MOVREL-NEXT:    v_mov_b32_e32 v15, s7
313; MOVREL-NEXT:    v_mov_b32_e32 v8, s0
314; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 0
315; MOVREL-NEXT:    v_mov_b32_e32 v9, s1
316; MOVREL-NEXT:    v_mov_b32_e32 v10, s2
317; MOVREL-NEXT:    v_mov_b32_e32 v11, s3
318; MOVREL-NEXT:    v_mov_b32_e32 v12, s4
319; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
320; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 1
321; MOVREL-NEXT:    v_mov_b32_e32 v13, s5
322; MOVREL-NEXT:    v_mov_b32_e32 v14, s6
323; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v9, v0, vcc_lo
324; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 2
325; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
326; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 3
327; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc_lo
328; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 4
329; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc_lo
330; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 5
331; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc_lo
332; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 6
333; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc_lo
334; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s10, 7
335; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc_lo
336; MOVREL-NEXT:    v_mov_b32_e32 v0, v8
337; MOVREL-NEXT:    ; return to shader part epilog
338entry:
339  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
340  ret <8 x float> %insert
341}
342
343define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) {
344; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s:
345; GPRIDX:       ; %bb.0: ; %entry
346; GPRIDX-NEXT:    v_mov_b32_e32 v8, s2
347; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 0
348; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
349; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 1
350; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
351; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 2
352; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
353; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 3
354; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
355; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 4
356; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
357; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 5
358; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
359; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 6
360; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
361; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 7
362; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
363; GPRIDX-NEXT:    ; return to shader part epilog
364;
365; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_s:
366; MOVREL:       ; %bb.0: ; %entry
367; MOVREL-NEXT:    v_mov_b32_e32 v8, s2
368; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 0
369; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
370; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 1
371; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
372; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 2
373; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
374; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 3
375; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
376; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 4
377; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
378; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 5
379; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
380; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 6
381; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
382; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s3, 7
383; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
384; MOVREL-NEXT:    ; return to shader part epilog
385entry:
386  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
387  ret <8 x float> %insert
388}
389
390define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) {
391; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v:
392; GPRIDX:       ; %bb.0: ; %entry
393; GPRIDX-NEXT:    s_mov_b32 s1, s3
394; GPRIDX-NEXT:    s_mov_b32 s3, s5
395; GPRIDX-NEXT:    s_mov_b32 s5, s7
396; GPRIDX-NEXT:    s_mov_b32 s7, s9
397; GPRIDX-NEXT:    s_mov_b32 s0, s2
398; GPRIDX-NEXT:    s_mov_b32 s2, s4
399; GPRIDX-NEXT:    s_mov_b32 s4, s6
400; GPRIDX-NEXT:    s_mov_b32 s6, s8
401; GPRIDX-NEXT:    v_mov_b32_e32 v15, s7
402; GPRIDX-NEXT:    v_mov_b32_e32 v8, s0
403; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
404; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
405; GPRIDX-NEXT:    v_mov_b32_e32 v9, s1
406; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
407; GPRIDX-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc
408; GPRIDX-NEXT:    v_mov_b32_e32 v10, s2
409; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
410; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
411; GPRIDX-NEXT:    v_mov_b32_e32 v11, s3
412; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
413; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc
414; GPRIDX-NEXT:    v_mov_b32_e32 v12, s4
415; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
416; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc
417; GPRIDX-NEXT:    v_mov_b32_e32 v13, s5
418; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
419; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc
420; GPRIDX-NEXT:    v_mov_b32_e32 v14, s6
421; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
422; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc
423; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v1
424; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc
425; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
426; GPRIDX-NEXT:    v_mov_b32_e32 v1, v9
427; GPRIDX-NEXT:    ; return to shader part epilog
428;
429; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_v:
430; MOVREL:       ; %bb.0: ; %entry
431; MOVREL-NEXT:    s_mov_b32 s1, s3
432; MOVREL-NEXT:    s_mov_b32 s3, s5
433; MOVREL-NEXT:    s_mov_b32 s5, s7
434; MOVREL-NEXT:    s_mov_b32 s7, s9
435; MOVREL-NEXT:    s_mov_b32 s0, s2
436; MOVREL-NEXT:    s_mov_b32 s2, s4
437; MOVREL-NEXT:    s_mov_b32 s4, s6
438; MOVREL-NEXT:    s_mov_b32 s6, s8
439; MOVREL-NEXT:    v_mov_b32_e32 v15, s7
440; MOVREL-NEXT:    v_mov_b32_e32 v8, s0
441; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
442; MOVREL-NEXT:    v_mov_b32_e32 v9, s1
443; MOVREL-NEXT:    v_mov_b32_e32 v10, s2
444; MOVREL-NEXT:    v_mov_b32_e32 v11, s3
445; MOVREL-NEXT:    v_mov_b32_e32 v12, s4
446; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc_lo
447; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
448; MOVREL-NEXT:    v_mov_b32_e32 v13, s5
449; MOVREL-NEXT:    v_mov_b32_e32 v14, s6
450; MOVREL-NEXT:    v_cndmask_b32_e32 v9, v9, v0, vcc_lo
451; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
452; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc_lo
453; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
454; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc_lo
455; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
456; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc_lo
457; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
458; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc_lo
459; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
460; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc_lo
461; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v1
462; MOVREL-NEXT:    v_mov_b32_e32 v1, v9
463; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v15, v0, vcc_lo
464; MOVREL-NEXT:    v_mov_b32_e32 v0, v8
465; MOVREL-NEXT:    ; return to shader part epilog
466entry:
467  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
468  ret <8 x float> %insert
469}
470
471define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) {
472; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v:
473; GPRIDX:       ; %bb.0: ; %entry
474; GPRIDX-NEXT:    v_mov_b32_e32 v9, s2
475; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
476; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
477; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
478; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
479; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
480; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc
481; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
482; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
483; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
484; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
485; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
486; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
487; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
488; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc
489; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v8
490; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc
491; GPRIDX-NEXT:    ; return to shader part epilog
492;
493; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_v:
494; MOVREL:       ; %bb.0: ; %entry
495; MOVREL-NEXT:    v_mov_b32_e32 v9, s2
496; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
497; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
498; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
499; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
500; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
501; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v9, vcc_lo
502; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
503; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc_lo
504; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
505; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
506; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
507; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc_lo
508; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
509; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v9, vcc_lo
510; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v8
511; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v9, vcc_lo
512; MOVREL-NEXT:    ; return to shader part epilog
513entry:
514  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
515  ret <8 x float> %insert
516}
517
518define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) {
519; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s:
520; GPRIDX:       ; %bb.0: ; %entry
521; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
522; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
523; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
524; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
525; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
526; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
527; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
528; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
529; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
530; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
531; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
532; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
533; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
534; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
535; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
536; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
537; GPRIDX-NEXT:    ; return to shader part epilog
538;
539; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_s:
540; MOVREL:       ; %bb.0: ; %entry
541; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
542; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
543; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
544; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
545; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
546; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
547; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
548; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
549; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
550; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
551; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
552; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
553; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
554; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
555; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 7
556; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
557; MOVREL-NEXT:    ; return to shader part epilog
558entry:
559  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
560  ret <8 x float> %insert
561}
562
563define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) {
564; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
565; GPRIDX:       ; %bb.0: ; %entry
566; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
567; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
568; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
569; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
570; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
571; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
572; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
573; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
574; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
575; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
576; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
577; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
578; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
579; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
580; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 7
581; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
582; GPRIDX-NEXT:    ; return to shader part epilog
583;
584; MOVREL-LABEL: dyn_insertelement_v8p3i8_v_v_s:
585; MOVREL:       ; %bb.0: ; %entry
586; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
587; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
588; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
589; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
590; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
591; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
592; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
593; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
594; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
595; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
596; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
597; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
598; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
599; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
600; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 7
601; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
602; MOVREL-NEXT:    ; return to shader part epilog
603entry:
604  %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
605  %cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32>
606  %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
607  ret <8 x float> %cast.1
608}
609
610define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) {
611; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v:
612; GPRIDX:       ; %bb.0: ; %entry
613; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
614; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
615; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
616; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
617; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
618; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
619; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
620; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
621; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
622; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
623; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
624; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
625; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
626; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
627; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
628; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
629; GPRIDX-NEXT:    ; return to shader part epilog
630;
631; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v:
632; MOVREL:       ; %bb.0: ; %entry
633; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
634; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
635; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
636; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
637; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
638; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
639; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
640; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
641; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
642; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
643; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
644; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
645; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
646; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
647; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
648; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
649; MOVREL-NEXT:    ; return to shader part epilog
650entry:
651  %insert = insertelement <8 x float> %vec, float %val, i32 %idx
652  ret <8 x float> %insert
653}
654
655define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
656; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s:
657; GPRIDX:       ; %bb.0: ; %entry
658; GPRIDX-NEXT:    s_mov_b32 s0, s2
659; GPRIDX-NEXT:    s_mov_b32 s1, s3
660; GPRIDX-NEXT:    s_mov_b32 s2, s4
661; GPRIDX-NEXT:    s_mov_b32 s3, s5
662; GPRIDX-NEXT:    s_mov_b32 s4, s6
663; GPRIDX-NEXT:    s_mov_b32 s5, s7
664; GPRIDX-NEXT:    s_mov_b32 s6, s8
665; GPRIDX-NEXT:    s_mov_b32 s7, s9
666; GPRIDX-NEXT:    s_mov_b32 s8, s10
667; GPRIDX-NEXT:    s_mov_b32 s9, s11
668; GPRIDX-NEXT:    s_mov_b32 s10, s12
669; GPRIDX-NEXT:    s_mov_b32 s11, s13
670; GPRIDX-NEXT:    s_mov_b32 s12, s14
671; GPRIDX-NEXT:    s_mov_b32 s13, s15
672; GPRIDX-NEXT:    s_mov_b32 s14, s16
673; GPRIDX-NEXT:    s_mov_b32 s15, s17
674; GPRIDX-NEXT:    s_mov_b32 m0, s20
675; GPRIDX-NEXT:    s_nop 0
676; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[18:19]
677; GPRIDX-NEXT:    ; return to shader part epilog
678;
679; MOVREL-LABEL: dyn_insertelement_v8i64_s_s_s:
680; MOVREL:       ; %bb.0: ; %entry
681; MOVREL-NEXT:    s_mov_b32 s0, s2
682; MOVREL-NEXT:    s_mov_b32 s1, s3
683; MOVREL-NEXT:    s_mov_b32 m0, s20
684; MOVREL-NEXT:    s_mov_b32 s2, s4
685; MOVREL-NEXT:    s_mov_b32 s3, s5
686; MOVREL-NEXT:    s_mov_b32 s4, s6
687; MOVREL-NEXT:    s_mov_b32 s5, s7
688; MOVREL-NEXT:    s_mov_b32 s6, s8
689; MOVREL-NEXT:    s_mov_b32 s7, s9
690; MOVREL-NEXT:    s_mov_b32 s8, s10
691; MOVREL-NEXT:    s_mov_b32 s9, s11
692; MOVREL-NEXT:    s_mov_b32 s10, s12
693; MOVREL-NEXT:    s_mov_b32 s11, s13
694; MOVREL-NEXT:    s_mov_b32 s12, s14
695; MOVREL-NEXT:    s_mov_b32 s13, s15
696; MOVREL-NEXT:    s_mov_b32 s14, s16
697; MOVREL-NEXT:    s_mov_b32 s15, s17
698; MOVREL-NEXT:    s_movreld_b64 s[0:1], s[18:19]
699; MOVREL-NEXT:    ; return to shader part epilog
700entry:
701  %insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx
702  ret <8 x i64> %insert
703}
704
705define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) {
706; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
707; GPRIDX:       ; %bb.0: ; %entry
708; GPRIDX-NEXT:    s_mov_b32 s0, s2
709; GPRIDX-NEXT:    s_mov_b32 s1, s3
710; GPRIDX-NEXT:    s_mov_b32 s2, s4
711; GPRIDX-NEXT:    s_mov_b32 s3, s5
712; GPRIDX-NEXT:    s_mov_b32 s4, s6
713; GPRIDX-NEXT:    s_mov_b32 s5, s7
714; GPRIDX-NEXT:    s_mov_b32 s6, s8
715; GPRIDX-NEXT:    s_mov_b32 s7, s9
716; GPRIDX-NEXT:    s_mov_b32 s8, s10
717; GPRIDX-NEXT:    s_mov_b32 s9, s11
718; GPRIDX-NEXT:    s_mov_b32 s10, s12
719; GPRIDX-NEXT:    s_mov_b32 s11, s13
720; GPRIDX-NEXT:    s_mov_b32 s12, s14
721; GPRIDX-NEXT:    s_mov_b32 s13, s15
722; GPRIDX-NEXT:    s_mov_b32 s14, s16
723; GPRIDX-NEXT:    s_mov_b32 s15, s17
724; GPRIDX-NEXT:    s_mov_b32 m0, s20
725; GPRIDX-NEXT:    s_nop 0
726; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[18:19]
727; GPRIDX-NEXT:    ; return to shader part epilog
728;
729; MOVREL-LABEL: dyn_insertelement_v8p1i8_s_s_s:
730; MOVREL:       ; %bb.0: ; %entry
731; MOVREL-NEXT:    s_mov_b32 s0, s2
732; MOVREL-NEXT:    s_mov_b32 s1, s3
733; MOVREL-NEXT:    s_mov_b32 m0, s20
734; MOVREL-NEXT:    s_mov_b32 s2, s4
735; MOVREL-NEXT:    s_mov_b32 s3, s5
736; MOVREL-NEXT:    s_mov_b32 s4, s6
737; MOVREL-NEXT:    s_mov_b32 s5, s7
738; MOVREL-NEXT:    s_mov_b32 s6, s8
739; MOVREL-NEXT:    s_mov_b32 s7, s9
740; MOVREL-NEXT:    s_mov_b32 s8, s10
741; MOVREL-NEXT:    s_mov_b32 s9, s11
742; MOVREL-NEXT:    s_mov_b32 s10, s12
743; MOVREL-NEXT:    s_mov_b32 s11, s13
744; MOVREL-NEXT:    s_mov_b32 s12, s14
745; MOVREL-NEXT:    s_mov_b32 s13, s15
746; MOVREL-NEXT:    s_mov_b32 s14, s16
747; MOVREL-NEXT:    s_mov_b32 s15, s17
748; MOVREL-NEXT:    s_movreld_b64 s[0:1], s[18:19]
749; MOVREL-NEXT:    ; return to shader part epilog
750entry:
751  %insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx
752  ret <8 x i8 addrspace(1)*> %insert
753}
754
755define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
756; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
757; GPRIDX:       ; %bb.0: ; %entry
758; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GPRIDX-NEXT:    s_mov_b32 s18, 0
760; GPRIDX-NEXT:    s_mov_b64 s[4:5], 1.0
761; GPRIDX-NEXT:    s_mov_b32 s19, 0x40200000
762; GPRIDX-NEXT:    s_mov_b32 s17, 0x401c0000
763; GPRIDX-NEXT:    s_mov_b32 s16, s18
764; GPRIDX-NEXT:    s_mov_b32 s15, 0x40180000
765; GPRIDX-NEXT:    s_mov_b32 s14, s18
766; GPRIDX-NEXT:    s_mov_b32 s13, 0x40140000
767; GPRIDX-NEXT:    s_mov_b32 s12, s18
768; GPRIDX-NEXT:    s_mov_b64 s[10:11], 4.0
769; GPRIDX-NEXT:    s_mov_b32 s9, 0x40080000
770; GPRIDX-NEXT:    s_mov_b32 s8, s18
771; GPRIDX-NEXT:    s_mov_b64 s[6:7], 2.0
772; GPRIDX-NEXT:    v_mov_b32_e32 v3, s4
773; GPRIDX-NEXT:    v_mov_b32_e32 v4, s5
774; GPRIDX-NEXT:    v_mov_b32_e32 v5, s6
775; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
776; GPRIDX-NEXT:    v_mov_b32_e32 v6, s7
777; GPRIDX-NEXT:    v_mov_b32_e32 v7, s8
778; GPRIDX-NEXT:    v_mov_b32_e32 v8, s9
779; GPRIDX-NEXT:    v_mov_b32_e32 v9, s10
780; GPRIDX-NEXT:    v_mov_b32_e32 v10, s11
781; GPRIDX-NEXT:    v_mov_b32_e32 v11, s12
782; GPRIDX-NEXT:    v_mov_b32_e32 v12, s13
783; GPRIDX-NEXT:    v_mov_b32_e32 v13, s14
784; GPRIDX-NEXT:    v_mov_b32_e32 v14, s15
785; GPRIDX-NEXT:    v_mov_b32_e32 v15, s16
786; GPRIDX-NEXT:    v_mov_b32_e32 v16, s17
787; GPRIDX-NEXT:    v_mov_b32_e32 v17, s18
788; GPRIDX-NEXT:    v_mov_b32_e32 v18, s19
789; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[16:17], 0, v2
790; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v2
791; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 3, v2
792; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 4, v2
793; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 5, v2
794; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 6, v2
795; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[14:15], 7, v2
796; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[16:17]
797; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
798; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v1, s[16:17]
799; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
800; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[4:5]
801; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[6:7]
802; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[8:9]
803; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s[10:11]
804; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s[12:13]
805; GPRIDX-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s[14:15]
806; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[4:5]
807; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[6:7]
808; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[8:9]
809; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s[10:11]
810; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s[12:13]
811; GPRIDX-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s[14:15]
812; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
813; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
814; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
815; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
816; GPRIDX-NEXT:    s_waitcnt vmcnt(0)
817; GPRIDX-NEXT:    s_setpc_b64 s[30:31]
818;
819; MOVREL-LABEL: dyn_insertelement_v8f64_const_s_v_v:
820; MOVREL:       ; %bb.0: ; %entry
821; MOVREL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822; MOVREL-NEXT:    s_waitcnt_vscnt null, 0x0
823; MOVREL-NEXT:    s_mov_b32 s18, 0
824; MOVREL-NEXT:    s_mov_b64 s[4:5], 1.0
825; MOVREL-NEXT:    s_mov_b32 s19, 0x40200000
826; MOVREL-NEXT:    s_mov_b32 s17, 0x401c0000
827; MOVREL-NEXT:    s_mov_b32 s16, s18
828; MOVREL-NEXT:    s_mov_b32 s15, 0x40180000
829; MOVREL-NEXT:    s_mov_b32 s14, s18
830; MOVREL-NEXT:    s_mov_b32 s13, 0x40140000
831; MOVREL-NEXT:    s_mov_b32 s12, s18
832; MOVREL-NEXT:    s_mov_b64 s[10:11], 4.0
833; MOVREL-NEXT:    s_mov_b32 s9, 0x40080000
834; MOVREL-NEXT:    s_mov_b32 s8, s18
835; MOVREL-NEXT:    s_mov_b64 s[6:7], 2.0
836; MOVREL-NEXT:    v_mov_b32_e32 v3, s4
837; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
838; MOVREL-NEXT:    v_mov_b32_e32 v4, s5
839; MOVREL-NEXT:    v_mov_b32_e32 v5, s6
840; MOVREL-NEXT:    v_mov_b32_e32 v6, s7
841; MOVREL-NEXT:    v_mov_b32_e32 v7, s8
842; MOVREL-NEXT:    v_mov_b32_e32 v8, s9
843; MOVREL-NEXT:    v_mov_b32_e32 v9, s10
844; MOVREL-NEXT:    v_mov_b32_e32 v10, s11
845; MOVREL-NEXT:    v_mov_b32_e32 v11, s12
846; MOVREL-NEXT:    v_mov_b32_e32 v12, s13
847; MOVREL-NEXT:    v_mov_b32_e32 v13, s14
848; MOVREL-NEXT:    v_mov_b32_e32 v14, s15
849; MOVREL-NEXT:    v_mov_b32_e32 v15, s16
850; MOVREL-NEXT:    v_mov_b32_e32 v16, s17
851; MOVREL-NEXT:    v_mov_b32_e32 v17, s18
852; MOVREL-NEXT:    v_mov_b32_e32 v18, s19
853; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 1, v2
854; MOVREL-NEXT:    v_cmp_eq_u32_e64 s5, 3, v2
855; MOVREL-NEXT:    v_cmp_eq_u32_e64 s10, 2, v2
856; MOVREL-NEXT:    v_cmp_eq_u32_e64 s6, 4, v2
857; MOVREL-NEXT:    v_cmp_eq_u32_e64 s7, 5, v2
858; MOVREL-NEXT:    v_cmp_eq_u32_e64 s8, 6, v2
859; MOVREL-NEXT:    v_cmp_eq_u32_e64 s9, 7, v2
860; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
861; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s4
862; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
863; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s4
864; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s10
865; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s5
866; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s10
867; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s5
868; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s6
869; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s7
870; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s6
871; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s7
872; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s8
873; MOVREL-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s9
874; MOVREL-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s8
875; MOVREL-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s9
876; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
877; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
878; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
879; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
880; MOVREL-NEXT:    s_waitcnt_vscnt null, 0x0
881; MOVREL-NEXT:    s_setpc_b64 s[30:31]
882entry:
883  %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
884  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
885  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
886  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
887  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
888  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
889  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
890  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
891  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
892  ret void
893}
894
895define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) {
896; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v:
897; GPRIDX:       ; %bb.0: ; %entry
898; GPRIDX-NEXT:    s_mov_b32 s1, s3
899; GPRIDX-NEXT:    s_mov_b32 s3, s5
900; GPRIDX-NEXT:    s_mov_b32 s5, s7
901; GPRIDX-NEXT:    s_mov_b32 s7, s9
902; GPRIDX-NEXT:    s_mov_b32 s9, s11
903; GPRIDX-NEXT:    s_mov_b32 s11, s13
904; GPRIDX-NEXT:    s_mov_b32 s13, s15
905; GPRIDX-NEXT:    s_mov_b32 s15, s17
906; GPRIDX-NEXT:    s_mov_b32 s0, s2
907; GPRIDX-NEXT:    s_mov_b32 s2, s4
908; GPRIDX-NEXT:    s_mov_b32 s4, s6
909; GPRIDX-NEXT:    s_mov_b32 s6, s8
910; GPRIDX-NEXT:    s_mov_b32 s8, s10
911; GPRIDX-NEXT:    s_mov_b32 s10, s12
912; GPRIDX-NEXT:    s_mov_b32 s12, s14
913; GPRIDX-NEXT:    s_mov_b32 s14, s16
914; GPRIDX-NEXT:    v_mov_b32_e32 v16, s15
915; GPRIDX-NEXT:    v_mov_b32_e32 v15, s14
916; GPRIDX-NEXT:    v_mov_b32_e32 v14, s13
917; GPRIDX-NEXT:    v_mov_b32_e32 v13, s12
918; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
919; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
920; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
921; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
922; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
923; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
924; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
925; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
926; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
927; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
928; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
929; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
930; GPRIDX-NEXT:    v_mov_b32_e32 v17, s18
931; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
932; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v0
933; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v0
934; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v0
935; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v0
936; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v0
937; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v0
938; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v0
939; GPRIDX-NEXT:    v_mov_b32_e32 v0, s19
940; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s[12:13]
941; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v17, vcc
942; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v0, s[12:13]
943; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
944; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s[0:1]
945; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[2:3]
946; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s[4:5]
947; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s[6:7]
948; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s[8:9]
949; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s[10:11]
950; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v0, s[0:1]
951; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s[2:3]
952; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v0, s[4:5]
953; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v0, s[6:7]
954; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v0, s[8:9]
955; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v0, s[10:11]
956; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[1:4], off
957; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[5:8], off
958; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[9:12], off
959; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[13:16], off
960; GPRIDX-NEXT:    s_endpgm
961;
962; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_v:
963; MOVREL:       ; %bb.0: ; %entry
964; MOVREL-NEXT:    s_mov_b32 s1, s3
965; MOVREL-NEXT:    s_mov_b32 s3, s5
966; MOVREL-NEXT:    s_mov_b32 s5, s7
967; MOVREL-NEXT:    s_mov_b32 s7, s9
968; MOVREL-NEXT:    s_mov_b32 s9, s11
969; MOVREL-NEXT:    s_mov_b32 s11, s13
970; MOVREL-NEXT:    s_mov_b32 s13, s15
971; MOVREL-NEXT:    s_mov_b32 s15, s17
972; MOVREL-NEXT:    s_mov_b32 s0, s2
973; MOVREL-NEXT:    s_mov_b32 s2, s4
974; MOVREL-NEXT:    s_mov_b32 s4, s6
975; MOVREL-NEXT:    s_mov_b32 s6, s8
976; MOVREL-NEXT:    s_mov_b32 s8, s10
977; MOVREL-NEXT:    s_mov_b32 s10, s12
978; MOVREL-NEXT:    s_mov_b32 s12, s14
979; MOVREL-NEXT:    s_mov_b32 s14, s16
980; MOVREL-NEXT:    v_mov_b32_e32 v16, s15
981; MOVREL-NEXT:    v_mov_b32_e32 v2, s1
982; MOVREL-NEXT:    v_mov_b32_e32 v1, s0
983; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
984; MOVREL-NEXT:    v_mov_b32_e32 v15, s14
985; MOVREL-NEXT:    v_mov_b32_e32 v14, s13
986; MOVREL-NEXT:    v_mov_b32_e32 v13, s12
987; MOVREL-NEXT:    v_mov_b32_e32 v12, s11
988; MOVREL-NEXT:    v_mov_b32_e32 v11, s10
989; MOVREL-NEXT:    v_mov_b32_e32 v10, s9
990; MOVREL-NEXT:    v_mov_b32_e32 v9, s8
991; MOVREL-NEXT:    v_mov_b32_e32 v8, s7
992; MOVREL-NEXT:    v_mov_b32_e32 v7, s6
993; MOVREL-NEXT:    v_mov_b32_e32 v6, s5
994; MOVREL-NEXT:    v_mov_b32_e32 v5, s4
995; MOVREL-NEXT:    v_mov_b32_e32 v4, s3
996; MOVREL-NEXT:    v_mov_b32_e32 v3, s2
997; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
998; MOVREL-NEXT:    s_mov_b32 s30, s18
999; MOVREL-NEXT:    s_mov_b32 s31, s19
1000; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 2, v0
1001; MOVREL-NEXT:    v_cndmask_b32_e64 v1, v1, s30, vcc_lo
1002; MOVREL-NEXT:    v_cndmask_b32_e64 v2, v2, s31, vcc_lo
1003; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v0
1004; MOVREL-NEXT:    v_cmp_eq_u32_e64 s2, 5, v0
1005; MOVREL-NEXT:    v_cndmask_b32_e64 v3, v3, s30, s0
1006; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v4, s31, s0
1007; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 4, v0
1008; MOVREL-NEXT:    v_cmp_eq_u32_e64 s3, 6, v0
1009; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 7, v0
1010; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, s30, s1
1011; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, s31, s1
1012; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, s30, vcc_lo
1013; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, s31, vcc_lo
1014; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, s30, s0
1015; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, s31, s0
1016; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, s30, s2
1017; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, s31, s2
1018; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, s30, s3
1019; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, s31, s3
1020; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, s30, s4
1021; MOVREL-NEXT:    v_cndmask_b32_e64 v16, v16, s31, s4
1022; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[1:4], off
1023; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[5:8], off
1024; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[9:12], off
1025; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[13:16], off
1026; MOVREL-NEXT:    s_endpgm
1027entry:
1028  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1029  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1030  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1031  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1032  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1033  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1034  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1035  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1036  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1037  ret void
1038}
1039
1040define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) {
1041; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s:
1042; GPRIDX:       ; %bb.0: ; %entry
1043; GPRIDX-NEXT:    s_mov_b32 s1, s3
1044; GPRIDX-NEXT:    s_mov_b32 s3, s5
1045; GPRIDX-NEXT:    s_mov_b32 s5, s7
1046; GPRIDX-NEXT:    s_mov_b32 s7, s9
1047; GPRIDX-NEXT:    s_mov_b32 s9, s11
1048; GPRIDX-NEXT:    s_mov_b32 s11, s13
1049; GPRIDX-NEXT:    s_mov_b32 s13, s15
1050; GPRIDX-NEXT:    s_mov_b32 s15, s17
1051; GPRIDX-NEXT:    s_mov_b32 s0, s2
1052; GPRIDX-NEXT:    s_mov_b32 s2, s4
1053; GPRIDX-NEXT:    s_mov_b32 s4, s6
1054; GPRIDX-NEXT:    s_mov_b32 s6, s8
1055; GPRIDX-NEXT:    s_mov_b32 s8, s10
1056; GPRIDX-NEXT:    s_mov_b32 s10, s12
1057; GPRIDX-NEXT:    s_mov_b32 s12, s14
1058; GPRIDX-NEXT:    s_mov_b32 s14, s16
1059; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
1060; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
1061; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
1062; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
1063; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
1064; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
1065; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
1066; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
1067; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
1068; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
1069; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
1070; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
1071; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
1072; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
1073; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
1074; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
1075; GPRIDX-NEXT:    s_lshl_b32 s0, s18, 1
1076; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1077; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
1078; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
1079; GPRIDX-NEXT:    s_set_gpr_idx_off
1080; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1081; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1082; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1083; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1084; GPRIDX-NEXT:    s_endpgm
1085;
1086; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_s:
1087; MOVREL:       ; %bb.0: ; %entry
1088; MOVREL-NEXT:    s_mov_b32 s1, s3
1089; MOVREL-NEXT:    s_mov_b32 s3, s5
1090; MOVREL-NEXT:    s_mov_b32 s5, s7
1091; MOVREL-NEXT:    s_mov_b32 s7, s9
1092; MOVREL-NEXT:    s_mov_b32 s9, s11
1093; MOVREL-NEXT:    s_mov_b32 s11, s13
1094; MOVREL-NEXT:    s_mov_b32 s13, s15
1095; MOVREL-NEXT:    s_mov_b32 s15, s17
1096; MOVREL-NEXT:    s_mov_b32 s0, s2
1097; MOVREL-NEXT:    s_mov_b32 s2, s4
1098; MOVREL-NEXT:    s_mov_b32 s4, s6
1099; MOVREL-NEXT:    s_mov_b32 s6, s8
1100; MOVREL-NEXT:    s_mov_b32 s8, s10
1101; MOVREL-NEXT:    s_mov_b32 s10, s12
1102; MOVREL-NEXT:    s_mov_b32 s12, s14
1103; MOVREL-NEXT:    s_mov_b32 s14, s16
1104; MOVREL-NEXT:    v_mov_b32_e32 v17, s15
1105; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
1106; MOVREL-NEXT:    s_lshl_b32 m0, s18, 1
1107; MOVREL-NEXT:    v_mov_b32_e32 v16, s14
1108; MOVREL-NEXT:    v_mov_b32_e32 v15, s13
1109; MOVREL-NEXT:    v_mov_b32_e32 v14, s12
1110; MOVREL-NEXT:    v_mov_b32_e32 v13, s11
1111; MOVREL-NEXT:    v_mov_b32_e32 v12, s10
1112; MOVREL-NEXT:    v_mov_b32_e32 v11, s9
1113; MOVREL-NEXT:    v_mov_b32_e32 v10, s8
1114; MOVREL-NEXT:    v_mov_b32_e32 v9, s7
1115; MOVREL-NEXT:    v_mov_b32_e32 v8, s6
1116; MOVREL-NEXT:    v_mov_b32_e32 v7, s5
1117; MOVREL-NEXT:    v_mov_b32_e32 v6, s4
1118; MOVREL-NEXT:    v_mov_b32_e32 v5, s3
1119; MOVREL-NEXT:    v_mov_b32_e32 v4, s2
1120; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
1121; MOVREL-NEXT:    v_movreld_b32_e32 v2, v0
1122; MOVREL-NEXT:    v_movreld_b32_e32 v3, v1
1123; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1124; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1125; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1126; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1127; MOVREL-NEXT:    s_endpgm
1128entry:
1129  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1130  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1131  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1132  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1133  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1134  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1135  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1136  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1137  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1138  ret void
1139}
1140
1141define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) {
1142; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s:
1143; GPRIDX:       ; %bb.0: ; %entry
1144; GPRIDX-NEXT:    s_lshl_b32 s0, s4, 1
1145; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1146; GPRIDX-NEXT:    v_mov_b32_e32 v0, s2
1147; GPRIDX-NEXT:    v_mov_b32_e32 v1, s3
1148; GPRIDX-NEXT:    s_set_gpr_idx_off
1149; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1150; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1151; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1152; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1153; GPRIDX-NEXT:    s_endpgm
1154;
1155; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_s:
1156; MOVREL:       ; %bb.0: ; %entry
1157; MOVREL-NEXT:    s_lshl_b32 m0, s4, 1
1158; MOVREL-NEXT:    v_movreld_b32_e32 v0, s2
1159; MOVREL-NEXT:    v_movreld_b32_e32 v1, s3
1160; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1161; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1162; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1163; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1164; MOVREL-NEXT:    s_endpgm
1165entry:
1166  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1167  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1168  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1169  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1170  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1171  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1172  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1173  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1174  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1175  ret void
1176}
1177
1178define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) {
1179; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v:
1180; GPRIDX:       ; %bb.0: ; %entry
1181; GPRIDX-NEXT:    s_mov_b32 s1, s3
1182; GPRIDX-NEXT:    s_mov_b32 s3, s5
1183; GPRIDX-NEXT:    s_mov_b32 s5, s7
1184; GPRIDX-NEXT:    s_mov_b32 s7, s9
1185; GPRIDX-NEXT:    s_mov_b32 s9, s11
1186; GPRIDX-NEXT:    s_mov_b32 s11, s13
1187; GPRIDX-NEXT:    s_mov_b32 s13, s15
1188; GPRIDX-NEXT:    s_mov_b32 s15, s17
1189; GPRIDX-NEXT:    s_mov_b32 s0, s2
1190; GPRIDX-NEXT:    s_mov_b32 s2, s4
1191; GPRIDX-NEXT:    s_mov_b32 s4, s6
1192; GPRIDX-NEXT:    s_mov_b32 s6, s8
1193; GPRIDX-NEXT:    s_mov_b32 s8, s10
1194; GPRIDX-NEXT:    s_mov_b32 s10, s12
1195; GPRIDX-NEXT:    s_mov_b32 s12, s14
1196; GPRIDX-NEXT:    s_mov_b32 s14, s16
1197; GPRIDX-NEXT:    v_mov_b32_e32 v18, s15
1198; GPRIDX-NEXT:    v_mov_b32_e32 v17, s14
1199; GPRIDX-NEXT:    v_mov_b32_e32 v16, s13
1200; GPRIDX-NEXT:    v_mov_b32_e32 v15, s12
1201; GPRIDX-NEXT:    v_mov_b32_e32 v14, s11
1202; GPRIDX-NEXT:    v_mov_b32_e32 v13, s10
1203; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
1204; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
1205; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
1206; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
1207; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
1208; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
1209; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
1210; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
1211; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
1212; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
1213; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
1214; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v2
1215; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v2
1216; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v2
1217; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v2
1218; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v2
1219; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v2
1220; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v2
1221; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v0, s[12:13]
1222; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v0, vcc
1223; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v1, s[12:13]
1224; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc
1225; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s[0:1]
1226; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s[2:3]
1227; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s[4:5]
1228; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s[6:7]
1229; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s[8:9]
1230; GPRIDX-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s[10:11]
1231; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[0:1]
1232; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[2:3]
1233; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[4:5]
1234; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s[6:7]
1235; GPRIDX-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s[8:9]
1236; GPRIDX-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s[10:11]
1237; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1238; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
1239; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
1240; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
1241; GPRIDX-NEXT:    s_endpgm
1242;
1243; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_v:
1244; MOVREL:       ; %bb.0: ; %entry
1245; MOVREL-NEXT:    s_mov_b32 s1, s3
1246; MOVREL-NEXT:    s_mov_b32 s3, s5
1247; MOVREL-NEXT:    s_mov_b32 s5, s7
1248; MOVREL-NEXT:    s_mov_b32 s7, s9
1249; MOVREL-NEXT:    s_mov_b32 s9, s11
1250; MOVREL-NEXT:    s_mov_b32 s11, s13
1251; MOVREL-NEXT:    s_mov_b32 s13, s15
1252; MOVREL-NEXT:    s_mov_b32 s15, s17
1253; MOVREL-NEXT:    s_mov_b32 s0, s2
1254; MOVREL-NEXT:    s_mov_b32 s2, s4
1255; MOVREL-NEXT:    s_mov_b32 s4, s6
1256; MOVREL-NEXT:    s_mov_b32 s6, s8
1257; MOVREL-NEXT:    s_mov_b32 s8, s10
1258; MOVREL-NEXT:    s_mov_b32 s10, s12
1259; MOVREL-NEXT:    s_mov_b32 s12, s14
1260; MOVREL-NEXT:    s_mov_b32 s14, s16
1261; MOVREL-NEXT:    v_mov_b32_e32 v18, s15
1262; MOVREL-NEXT:    v_mov_b32_e32 v17, s14
1263; MOVREL-NEXT:    v_mov_b32_e32 v16, s13
1264; MOVREL-NEXT:    v_mov_b32_e32 v15, s12
1265; MOVREL-NEXT:    v_mov_b32_e32 v14, s11
1266; MOVREL-NEXT:    v_mov_b32_e32 v13, s10
1267; MOVREL-NEXT:    v_mov_b32_e32 v12, s9
1268; MOVREL-NEXT:    v_mov_b32_e32 v11, s8
1269; MOVREL-NEXT:    v_mov_b32_e32 v10, s7
1270; MOVREL-NEXT:    v_mov_b32_e32 v9, s6
1271; MOVREL-NEXT:    v_mov_b32_e32 v8, s5
1272; MOVREL-NEXT:    v_mov_b32_e32 v7, s4
1273; MOVREL-NEXT:    v_mov_b32_e32 v6, s3
1274; MOVREL-NEXT:    v_mov_b32_e32 v5, s2
1275; MOVREL-NEXT:    v_mov_b32_e32 v4, s1
1276; MOVREL-NEXT:    v_mov_b32_e32 v3, s0
1277; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
1278; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
1279; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 3, v2
1280; MOVREL-NEXT:    v_cmp_eq_u32_e64 s6, 2, v2
1281; MOVREL-NEXT:    v_cmp_eq_u32_e64 s2, 4, v2
1282; MOVREL-NEXT:    v_cmp_eq_u32_e64 s3, 5, v2
1283; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 6, v2
1284; MOVREL-NEXT:    v_cmp_eq_u32_e64 s5, 7, v2
1285; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
1286; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
1287; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
1288; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
1289; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, v0, s6
1290; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s1
1291; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s6
1292; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s1
1293; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, v0, s2
1294; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, v0, s3
1295; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s2
1296; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, v1, s3
1297; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, v0, s4
1298; MOVREL-NEXT:    v_cndmask_b32_e64 v17, v17, v0, s5
1299; MOVREL-NEXT:    v_cndmask_b32_e64 v16, v16, v1, s4
1300; MOVREL-NEXT:    v_cndmask_b32_e64 v18, v18, v1, s5
1301; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[3:6], off
1302; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[7:10], off
1303; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[11:14], off
1304; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[15:18], off
1305; MOVREL-NEXT:    s_endpgm
1306entry:
1307  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1308  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1309  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1310  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1311  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1312  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1313  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1314  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1315  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1316  ret void
1317}
1318
1319define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) {
1320; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v:
1321; GPRIDX:       ; %bb.0: ; %entry
1322; GPRIDX-NEXT:    v_mov_b32_e32 v17, s2
1323; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
1324; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v16
1325; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 2, v16
1326; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 3, v16
1327; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 4, v16
1328; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 5, v16
1329; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 7, v16
1330; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[14:15], 6, v16
1331; GPRIDX-NEXT:    v_mov_b32_e32 v16, s3
1332; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v17, vcc
1333; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v17, s[0:1]
1334; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v16, vcc
1335; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v16, s[0:1]
1336; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v17, s[4:5]
1337; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v17, s[6:7]
1338; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v17, s[8:9]
1339; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v17, s[10:11]
1340; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v17, s[14:15]
1341; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v17, s[12:13]
1342; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v16, s[4:5]
1343; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v16, s[6:7]
1344; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[8:9]
1345; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v16, s[10:11]
1346; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v16, s[14:15]
1347; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[12:13]
1348; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1349; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1350; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1351; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1352; GPRIDX-NEXT:    s_endpgm
1353;
1354; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_v:
1355; MOVREL:       ; %bb.0: ; %entry
1356; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
1357; MOVREL-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
1358; MOVREL-NEXT:    v_cndmask_b32_e64 v1, v1, s3, vcc_lo
1359; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
1360; MOVREL-NEXT:    v_cndmask_b32_e64 v2, v2, s2, vcc_lo
1361; MOVREL-NEXT:    v_cndmask_b32_e64 v3, v3, s3, vcc_lo
1362; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v16
1363; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v4, s2, vcc_lo
1364; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, s3, vcc_lo
1365; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v16
1366; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, s2, vcc_lo
1367; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, s3, vcc_lo
1368; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v16
1369; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, s2, vcc_lo
1370; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, s3, vcc_lo
1371; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v16
1372; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, s2, vcc_lo
1373; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, s3, vcc_lo
1374; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v16
1375; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, s2, vcc_lo
1376; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, s3, vcc_lo
1377; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v16
1378; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, s2, vcc_lo
1379; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, s3, vcc_lo
1380; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1381; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1382; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1383; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1384; MOVREL-NEXT:    s_endpgm
1385entry:
1386  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1387  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1388  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1389  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1390  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1391  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1392  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1393  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1394  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1395  ret void
1396}
1397
1398define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) {
1399; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s:
1400; GPRIDX:       ; %bb.0: ; %entry
1401; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
1402; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
1403; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
1404; GPRIDX-NEXT:    v_mov_b32_e32 v1, v17
1405; GPRIDX-NEXT:    s_set_gpr_idx_off
1406; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1407; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1408; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1409; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1410; GPRIDX-NEXT:    s_endpgm
1411;
1412; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_s:
1413; MOVREL:       ; %bb.0: ; %entry
1414; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
1415; MOVREL-NEXT:    v_movreld_b32_e32 v0, v16
1416; MOVREL-NEXT:    v_movreld_b32_e32 v1, v17
1417; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1418; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1419; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1420; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1421; MOVREL-NEXT:    s_endpgm
1422entry:
1423  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1424  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1425  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1426  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1427  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1428  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1429  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1430  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1431  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1432  ret void
1433}
1434
1435define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) {
1436; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v:
1437; GPRIDX:       ; %bb.0: ; %entry
1438; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v18
1439; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v18
1440; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 2, v18
1441; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v18
1442; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 4, v18
1443; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 5, v18
1444; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v18
1445; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 6, v18
1446; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
1447; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s[0:1]
1448; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
1449; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s[0:1]
1450; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v16, s[2:3]
1451; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s[4:5]
1452; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s[6:7]
1453; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s[8:9]
1454; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s[12:13]
1455; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s[10:11]
1456; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s[2:3]
1457; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[4:5]
1458; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s[6:7]
1459; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s[8:9]
1460; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s[12:13]
1461; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s[10:11]
1462; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1463; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1464; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1465; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1466; GPRIDX-NEXT:    s_endpgm
1467;
1468; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v:
1469; MOVREL:       ; %bb.0: ; %entry
1470; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
1471; MOVREL-NEXT:    v_mov_b32_e32 v19, v0
1472; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
1473; MOVREL-NEXT:    v_mov_b32_e32 v23, v1
1474; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 2, v18
1475; MOVREL-NEXT:    v_cmp_eq_u32_e64 s2, 3, v18
1476; MOVREL-NEXT:    v_cmp_eq_u32_e64 s3, 4, v18
1477; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 5, v18
1478; MOVREL-NEXT:    v_cmp_eq_u32_e64 s5, 7, v18
1479; MOVREL-NEXT:    v_cmp_eq_u32_e64 s6, 6, v18
1480; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v19, v16, vcc_lo
1481; MOVREL-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
1482; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v23, v17, vcc_lo
1483; MOVREL-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
1484; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v4, v16, s1
1485; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s2
1486; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s1
1487; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s2
1488; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s3
1489; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s4
1490; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s3
1491; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s4
1492; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s6
1493; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s5
1494; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s6
1495; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s5
1496; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1497; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
1498; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
1499; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
1500; MOVREL-NEXT:    s_endpgm
1501entry:
1502  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
1503  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
1504  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
1505  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
1506  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
1507  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
1508  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
1509  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
1510  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
1511  ret void
1512}
1513
1514define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1515; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s:
1516; GPRIDX:       ; %bb.0: ; %entry
1517; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 0
1518; GPRIDX-NEXT:    s_cselect_b32 s0, s5, s2
1519; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 1
1520; GPRIDX-NEXT:    s_cselect_b32 s1, s5, s3
1521; GPRIDX-NEXT:    s_cmp_eq_u32 s6, 2
1522; GPRIDX-NEXT:    s_cselect_b32 s2, s5, s4
1523; GPRIDX-NEXT:    ; return to shader part epilog
1524;
1525; MOVREL-LABEL: dyn_insertelement_v3i32_s_s_s:
1526; MOVREL:       ; %bb.0: ; %entry
1527; MOVREL-NEXT:    s_cmp_eq_u32 s6, 0
1528; MOVREL-NEXT:    s_cselect_b32 s0, s5, s2
1529; MOVREL-NEXT:    s_cmp_eq_u32 s6, 1
1530; MOVREL-NEXT:    s_cselect_b32 s1, s5, s3
1531; MOVREL-NEXT:    s_cmp_eq_u32 s6, 2
1532; MOVREL-NEXT:    s_cselect_b32 s2, s5, s4
1533; MOVREL-NEXT:    ; return to shader part epilog
1534entry:
1535  %insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx
1536  ret <3 x i32> %insert
1537}
1538
1539define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) {
1540; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s:
1541; GPRIDX:       ; %bb.0: ; %entry
1542; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
1543; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1544; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
1545; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1546; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
1547; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1548; GPRIDX-NEXT:    ; return to shader part epilog
1549;
1550; MOVREL-LABEL: dyn_insertelement_v3i32_v_v_s:
1551; MOVREL:       ; %bb.0: ; %entry
1552; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
1553; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1554; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
1555; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1556; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
1557; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1558; MOVREL-NEXT:    ; return to shader part epilog
1559entry:
1560  %insert = insertelement <3 x float> %vec, float %val, i32 %idx
1561  ret <3 x float> %insert
1562}
1563
1564define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1565; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s:
1566; GPRIDX:       ; %bb.0: ; %entry
1567; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 0
1568; GPRIDX-NEXT:    s_cselect_b32 s0, s7, s2
1569; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 1
1570; GPRIDX-NEXT:    s_cselect_b32 s1, s7, s3
1571; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 2
1572; GPRIDX-NEXT:    s_cselect_b32 s2, s7, s4
1573; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 3
1574; GPRIDX-NEXT:    s_cselect_b32 s3, s7, s5
1575; GPRIDX-NEXT:    s_cmp_eq_u32 s8, 4
1576; GPRIDX-NEXT:    s_cselect_b32 s4, s7, s6
1577; GPRIDX-NEXT:    ; return to shader part epilog
1578;
1579; MOVREL-LABEL: dyn_insertelement_v5i32_s_s_s:
1580; MOVREL:       ; %bb.0: ; %entry
1581; MOVREL-NEXT:    s_cmp_eq_u32 s8, 0
1582; MOVREL-NEXT:    s_cselect_b32 s0, s7, s2
1583; MOVREL-NEXT:    s_cmp_eq_u32 s8, 1
1584; MOVREL-NEXT:    s_cselect_b32 s1, s7, s3
1585; MOVREL-NEXT:    s_cmp_eq_u32 s8, 2
1586; MOVREL-NEXT:    s_cselect_b32 s2, s7, s4
1587; MOVREL-NEXT:    s_cmp_eq_u32 s8, 3
1588; MOVREL-NEXT:    s_cselect_b32 s3, s7, s5
1589; MOVREL-NEXT:    s_cmp_eq_u32 s8, 4
1590; MOVREL-NEXT:    s_cselect_b32 s4, s7, s6
1591; MOVREL-NEXT:    ; return to shader part epilog
1592entry:
1593  %insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx
1594  ret <5 x i32> %insert
1595}
1596
1597define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) {
1598; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s:
1599; GPRIDX:       ; %bb.0: ; %entry
1600; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
1601; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1602; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
1603; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1604; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
1605; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
1606; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
1607; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
1608; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
1609; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1610; GPRIDX-NEXT:    ; return to shader part epilog
1611;
1612; MOVREL-LABEL: dyn_insertelement_v5i32_v_v_s:
1613; MOVREL:       ; %bb.0: ; %entry
1614; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
1615; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc_lo
1616; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
1617; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
1618; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
1619; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
1620; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
1621; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
1622; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
1623; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
1624; MOVREL-NEXT:    ; return to shader part epilog
1625entry:
1626  %insert = insertelement <5 x float> %vec, float %val, i32 %idx
1627  ret <5 x float> %insert
1628}
1629
1630define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
1631; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s:
1632; GPRIDX:       ; %bb.0: ; %entry
1633; GPRIDX-NEXT:    s_mov_b32 s0, s2
1634; GPRIDX-NEXT:    s_mov_b32 s1, s3
1635; GPRIDX-NEXT:    s_mov_b32 s2, s4
1636; GPRIDX-NEXT:    s_mov_b32 s3, s5
1637; GPRIDX-NEXT:    s_mov_b32 s4, s6
1638; GPRIDX-NEXT:    s_mov_b32 s5, s7
1639; GPRIDX-NEXT:    s_mov_b32 s6, s8
1640; GPRIDX-NEXT:    s_mov_b32 s7, s9
1641; GPRIDX-NEXT:    s_mov_b32 s8, s10
1642; GPRIDX-NEXT:    s_mov_b32 s9, s11
1643; GPRIDX-NEXT:    s_mov_b32 s10, s12
1644; GPRIDX-NEXT:    s_mov_b32 s11, s13
1645; GPRIDX-NEXT:    s_mov_b32 s12, s14
1646; GPRIDX-NEXT:    s_mov_b32 s13, s15
1647; GPRIDX-NEXT:    s_mov_b32 s14, s16
1648; GPRIDX-NEXT:    s_mov_b32 s15, s17
1649; GPRIDX-NEXT:    s_mov_b32 s16, s18
1650; GPRIDX-NEXT:    s_mov_b32 s17, s19
1651; GPRIDX-NEXT:    s_mov_b32 s18, s20
1652; GPRIDX-NEXT:    s_mov_b32 s19, s21
1653; GPRIDX-NEXT:    s_mov_b32 s20, s22
1654; GPRIDX-NEXT:    s_mov_b32 s21, s23
1655; GPRIDX-NEXT:    s_mov_b32 s22, s24
1656; GPRIDX-NEXT:    s_mov_b32 s23, s25
1657; GPRIDX-NEXT:    s_mov_b32 s24, s26
1658; GPRIDX-NEXT:    s_mov_b32 s25, s27
1659; GPRIDX-NEXT:    s_mov_b32 s26, s28
1660; GPRIDX-NEXT:    s_mov_b32 s27, s29
1661; GPRIDX-NEXT:    s_mov_b32 s28, s30
1662; GPRIDX-NEXT:    s_mov_b32 s29, s31
1663; GPRIDX-NEXT:    s_mov_b32 s31, s33
1664; GPRIDX-NEXT:    s_mov_b32 s30, s32
1665; GPRIDX-NEXT:    s_mov_b32 m0, s35
1666; GPRIDX-NEXT:    s_nop 0
1667; GPRIDX-NEXT:    s_movreld_b32 s0, s34
1668; GPRIDX-NEXT:    ; return to shader part epilog
1669;
1670; MOVREL-LABEL: dyn_insertelement_v32i32_s_s_s:
1671; MOVREL:       ; %bb.0: ; %entry
1672; MOVREL-NEXT:    s_mov_b32 s0, s2
1673; MOVREL-NEXT:    s_mov_b32 m0, s35
1674; MOVREL-NEXT:    s_mov_b32 s1, s3
1675; MOVREL-NEXT:    s_mov_b32 s2, s4
1676; MOVREL-NEXT:    s_mov_b32 s3, s5
1677; MOVREL-NEXT:    s_mov_b32 s4, s6
1678; MOVREL-NEXT:    s_mov_b32 s5, s7
1679; MOVREL-NEXT:    s_mov_b32 s6, s8
1680; MOVREL-NEXT:    s_mov_b32 s7, s9
1681; MOVREL-NEXT:    s_mov_b32 s8, s10
1682; MOVREL-NEXT:    s_mov_b32 s9, s11
1683; MOVREL-NEXT:    s_mov_b32 s10, s12
1684; MOVREL-NEXT:    s_mov_b32 s11, s13
1685; MOVREL-NEXT:    s_mov_b32 s12, s14
1686; MOVREL-NEXT:    s_mov_b32 s13, s15
1687; MOVREL-NEXT:    s_mov_b32 s14, s16
1688; MOVREL-NEXT:    s_mov_b32 s15, s17
1689; MOVREL-NEXT:    s_mov_b32 s16, s18
1690; MOVREL-NEXT:    s_mov_b32 s17, s19
1691; MOVREL-NEXT:    s_mov_b32 s18, s20
1692; MOVREL-NEXT:    s_mov_b32 s19, s21
1693; MOVREL-NEXT:    s_mov_b32 s20, s22
1694; MOVREL-NEXT:    s_mov_b32 s21, s23
1695; MOVREL-NEXT:    s_mov_b32 s22, s24
1696; MOVREL-NEXT:    s_mov_b32 s23, s25
1697; MOVREL-NEXT:    s_mov_b32 s24, s26
1698; MOVREL-NEXT:    s_mov_b32 s25, s27
1699; MOVREL-NEXT:    s_mov_b32 s26, s28
1700; MOVREL-NEXT:    s_mov_b32 s27, s29
1701; MOVREL-NEXT:    s_mov_b32 s28, s30
1702; MOVREL-NEXT:    s_mov_b32 s29, s31
1703; MOVREL-NEXT:    s_mov_b32 s31, s33
1704; MOVREL-NEXT:    s_mov_b32 s30, s32
1705; MOVREL-NEXT:    s_movreld_b32 s0, s34
1706; MOVREL-NEXT:    ; return to shader part epilog
1707entry:
1708  %insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx
1709  ret <32 x i32> %insert
1710}
1711
1712define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) {
1713; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s:
1714; GPRIDX:       ; %bb.0: ; %entry
1715; GPRIDX-NEXT:    s_set_gpr_idx_on s2, gpr_idx(DST)
1716; GPRIDX-NEXT:    v_mov_b32_e32 v0, v32
1717; GPRIDX-NEXT:    s_set_gpr_idx_off
1718; GPRIDX-NEXT:    ; return to shader part epilog
1719;
1720; MOVREL-LABEL: dyn_insertelement_v32i32_v_v_s:
1721; MOVREL:       ; %bb.0: ; %entry
1722; MOVREL-NEXT:    s_mov_b32 m0, s2
1723; MOVREL-NEXT:    v_movreld_b32_e32 v0, v32
1724; MOVREL-NEXT:    ; return to shader part epilog
1725entry:
1726  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
1727  ret <32 x float> %insert
1728}
1729
1730define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
1731; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
1732; GPRIDX:       ; %bb.0: ; %entry
1733; GPRIDX-NEXT:    s_add_i32 s11, s11, 1
1734; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
1735; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
1736; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
1737; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
1738; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
1739; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
1740; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
1741; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
1742; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
1743; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
1744; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
1745; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
1746; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
1747; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
1748; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
1749; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
1750; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1751; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1752; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
1753; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
1754; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
1755; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
1756; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
1757; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
1758; GPRIDX-NEXT:    ; return to shader part epilog
1759;
1760; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_1:
1761; MOVREL:       ; %bb.0: ; %entry
1762; MOVREL-NEXT:    s_add_i32 s11, s11, 1
1763; MOVREL-NEXT:    s_cmp_eq_u32 s11, 0
1764; MOVREL-NEXT:    s_cselect_b32 s0, s10, s2
1765; MOVREL-NEXT:    s_cmp_eq_u32 s11, 1
1766; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1767; MOVREL-NEXT:    s_cselect_b32 s1, s10, s3
1768; MOVREL-NEXT:    s_cmp_eq_u32 s11, 2
1769; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1770; MOVREL-NEXT:    s_cselect_b32 s2, s10, s4
1771; MOVREL-NEXT:    s_cmp_eq_u32 s11, 3
1772; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
1773; MOVREL-NEXT:    s_cselect_b32 s3, s10, s5
1774; MOVREL-NEXT:    s_cmp_eq_u32 s11, 4
1775; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
1776; MOVREL-NEXT:    s_cselect_b32 s4, s10, s6
1777; MOVREL-NEXT:    s_cmp_eq_u32 s11, 5
1778; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
1779; MOVREL-NEXT:    s_cselect_b32 s5, s10, s7
1780; MOVREL-NEXT:    s_cmp_eq_u32 s11, 6
1781; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
1782; MOVREL-NEXT:    s_cselect_b32 s6, s10, s8
1783; MOVREL-NEXT:    s_cmp_eq_u32 s11, 7
1784; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
1785; MOVREL-NEXT:    s_cselect_b32 s7, s10, s9
1786; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
1787; MOVREL-NEXT:    ; return to shader part epilog
1788entry:
1789  %idx.add = add i32 %idx, 1
1790  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
1791  ret <8 x float> %insert
1792}
1793
1794define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
1795; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
1796; GPRIDX:       ; %bb.0: ; %entry
1797; GPRIDX-NEXT:    s_add_i32 s11, s11, 7
1798; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
1799; GPRIDX-NEXT:    s_cselect_b32 s0, s10, s2
1800; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 1
1801; GPRIDX-NEXT:    s_cselect_b32 s1, s10, s3
1802; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 2
1803; GPRIDX-NEXT:    s_cselect_b32 s2, s10, s4
1804; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 3
1805; GPRIDX-NEXT:    s_cselect_b32 s3, s10, s5
1806; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 4
1807; GPRIDX-NEXT:    s_cselect_b32 s4, s10, s6
1808; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 5
1809; GPRIDX-NEXT:    s_cselect_b32 s5, s10, s7
1810; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 6
1811; GPRIDX-NEXT:    s_cselect_b32 s6, s10, s8
1812; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 7
1813; GPRIDX-NEXT:    s_cselect_b32 s7, s10, s9
1814; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1815; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1816; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
1817; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
1818; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
1819; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
1820; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
1821; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
1822; GPRIDX-NEXT:    ; return to shader part epilog
1823;
1824; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_7:
1825; MOVREL:       ; %bb.0: ; %entry
1826; MOVREL-NEXT:    s_add_i32 s11, s11, 7
1827; MOVREL-NEXT:    s_cmp_eq_u32 s11, 0
1828; MOVREL-NEXT:    s_cselect_b32 s0, s10, s2
1829; MOVREL-NEXT:    s_cmp_eq_u32 s11, 1
1830; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
1831; MOVREL-NEXT:    s_cselect_b32 s1, s10, s3
1832; MOVREL-NEXT:    s_cmp_eq_u32 s11, 2
1833; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
1834; MOVREL-NEXT:    s_cselect_b32 s2, s10, s4
1835; MOVREL-NEXT:    s_cmp_eq_u32 s11, 3
1836; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
1837; MOVREL-NEXT:    s_cselect_b32 s3, s10, s5
1838; MOVREL-NEXT:    s_cmp_eq_u32 s11, 4
1839; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
1840; MOVREL-NEXT:    s_cselect_b32 s4, s10, s6
1841; MOVREL-NEXT:    s_cmp_eq_u32 s11, 5
1842; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
1843; MOVREL-NEXT:    s_cselect_b32 s5, s10, s7
1844; MOVREL-NEXT:    s_cmp_eq_u32 s11, 6
1845; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
1846; MOVREL-NEXT:    s_cselect_b32 s6, s10, s8
1847; MOVREL-NEXT:    s_cmp_eq_u32 s11, 7
1848; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
1849; MOVREL-NEXT:    s_cselect_b32 s7, s10, s9
1850; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
1851; MOVREL-NEXT:    ; return to shader part epilog
1852entry:
1853  %idx.add = add i32 %idx, 7
1854  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
1855  ret <8 x float> %insert
1856}
1857
1858define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) {
1859; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
1860; GPRIDX:       ; %bb.0: ; %entry
1861; GPRIDX-NEXT:    v_add_u32_e32 v9, 1, v9
1862; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
1863; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1864; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
1865; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1866; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
1867; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1868; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
1869; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
1870; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
1871; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
1872; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
1873; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
1874; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
1875; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
1876; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
1877; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
1878; GPRIDX-NEXT:    ; return to shader part epilog
1879;
1880; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_1:
1881; MOVREL:       ; %bb.0: ; %entry
1882; MOVREL-NEXT:    v_add_nc_u32_e32 v9, 1, v9
1883; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
1884; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1885; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
1886; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
1887; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
1888; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
1889; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
1890; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
1891; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
1892; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
1893; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
1894; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
1895; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
1896; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
1897; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
1898; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
1899; MOVREL-NEXT:    ; return to shader part epilog
1900entry:
1901  %idx.add = add i32 %idx, 1
1902  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
1903  ret <8 x float> %insert
1904}
1905
1906define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) {
1907; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
1908; GPRIDX:       ; %bb.0: ; %entry
1909; GPRIDX-NEXT:    v_add_u32_e32 v9, 7, v9
1910; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
1911; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc
1912; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v9
1913; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1914; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v9
1915; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
1916; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v9
1917; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
1918; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v9
1919; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
1920; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v9
1921; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
1922; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v9
1923; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
1924; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 7, v9
1925; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
1926; GPRIDX-NEXT:    ; return to shader part epilog
1927;
1928; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_7:
1929; MOVREL:       ; %bb.0: ; %entry
1930; MOVREL-NEXT:    v_add_nc_u32_e32 v9, 7, v9
1931; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
1932; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
1933; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
1934; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc_lo
1935; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v9
1936; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc_lo
1937; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v9
1938; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc_lo
1939; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v9
1940; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
1941; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v9
1942; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc_lo
1943; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v9
1944; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc_lo
1945; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 7, v9
1946; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc_lo
1947; MOVREL-NEXT:    ; return to shader part epilog
1948entry:
1949  %idx.add = add i32 %idx, 7
1950  %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add
1951  ret <8 x float> %insert
1952}
1953
1954define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
1955; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
1956; GPRIDX:       ; %bb.0: ; %entry
1957; GPRIDX-NEXT:    s_mov_b32 s0, s2
1958; GPRIDX-NEXT:    s_mov_b32 s1, s3
1959; GPRIDX-NEXT:    s_mov_b32 s2, s4
1960; GPRIDX-NEXT:    s_mov_b32 s3, s5
1961; GPRIDX-NEXT:    s_mov_b32 s4, s6
1962; GPRIDX-NEXT:    s_mov_b32 s5, s7
1963; GPRIDX-NEXT:    s_mov_b32 s6, s8
1964; GPRIDX-NEXT:    s_mov_b32 s7, s9
1965; GPRIDX-NEXT:    s_mov_b32 s8, s10
1966; GPRIDX-NEXT:    s_mov_b32 s9, s11
1967; GPRIDX-NEXT:    s_mov_b32 s10, s12
1968; GPRIDX-NEXT:    s_mov_b32 s11, s13
1969; GPRIDX-NEXT:    s_mov_b32 s12, s14
1970; GPRIDX-NEXT:    s_mov_b32 s13, s15
1971; GPRIDX-NEXT:    s_mov_b32 s14, s16
1972; GPRIDX-NEXT:    s_mov_b32 s15, s17
1973; GPRIDX-NEXT:    s_mov_b32 m0, s20
1974; GPRIDX-NEXT:    s_nop 0
1975; GPRIDX-NEXT:    s_movreld_b64 s[2:3], s[18:19]
1976; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
1977; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
1978; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
1979; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
1980; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1981; GPRIDX-NEXT:    s_nop 0
1982; GPRIDX-NEXT:    v_mov_b32_e32 v0, s4
1983; GPRIDX-NEXT:    v_mov_b32_e32 v1, s5
1984; GPRIDX-NEXT:    v_mov_b32_e32 v2, s6
1985; GPRIDX-NEXT:    v_mov_b32_e32 v3, s7
1986; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1987; GPRIDX-NEXT:    s_nop 0
1988; GPRIDX-NEXT:    v_mov_b32_e32 v0, s8
1989; GPRIDX-NEXT:    v_mov_b32_e32 v1, s9
1990; GPRIDX-NEXT:    v_mov_b32_e32 v2, s10
1991; GPRIDX-NEXT:    v_mov_b32_e32 v3, s11
1992; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1993; GPRIDX-NEXT:    s_nop 0
1994; GPRIDX-NEXT:    v_mov_b32_e32 v0, s12
1995; GPRIDX-NEXT:    v_mov_b32_e32 v1, s13
1996; GPRIDX-NEXT:    v_mov_b32_e32 v2, s14
1997; GPRIDX-NEXT:    v_mov_b32_e32 v3, s15
1998; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1999; GPRIDX-NEXT:    s_endpgm
2000;
2001; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_s_add_1:
2002; MOVREL:       ; %bb.0: ; %entry
2003; MOVREL-NEXT:    s_mov_b32 s0, s2
2004; MOVREL-NEXT:    s_mov_b32 s1, s3
2005; MOVREL-NEXT:    s_mov_b32 s2, s4
2006; MOVREL-NEXT:    s_mov_b32 s3, s5
2007; MOVREL-NEXT:    s_mov_b32 m0, s20
2008; MOVREL-NEXT:    s_mov_b32 s4, s6
2009; MOVREL-NEXT:    s_mov_b32 s5, s7
2010; MOVREL-NEXT:    s_mov_b32 s6, s8
2011; MOVREL-NEXT:    s_mov_b32 s7, s9
2012; MOVREL-NEXT:    s_mov_b32 s8, s10
2013; MOVREL-NEXT:    s_mov_b32 s9, s11
2014; MOVREL-NEXT:    s_mov_b32 s10, s12
2015; MOVREL-NEXT:    s_mov_b32 s11, s13
2016; MOVREL-NEXT:    s_mov_b32 s12, s14
2017; MOVREL-NEXT:    s_mov_b32 s13, s15
2018; MOVREL-NEXT:    s_mov_b32 s14, s16
2019; MOVREL-NEXT:    s_mov_b32 s15, s17
2020; MOVREL-NEXT:    s_movreld_b64 s[2:3], s[18:19]
2021; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2022; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
2023; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2024; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
2025; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
2026; MOVREL-NEXT:    v_mov_b32_e32 v8, s8
2027; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
2028; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
2029; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
2030; MOVREL-NEXT:    v_mov_b32_e32 v12, s12
2031; MOVREL-NEXT:    v_mov_b32_e32 v9, s9
2032; MOVREL-NEXT:    v_mov_b32_e32 v10, s10
2033; MOVREL-NEXT:    v_mov_b32_e32 v11, s11
2034; MOVREL-NEXT:    v_mov_b32_e32 v13, s13
2035; MOVREL-NEXT:    v_mov_b32_e32 v14, s14
2036; MOVREL-NEXT:    v_mov_b32_e32 v15, s15
2037; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2038; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2039; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2040; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2041; MOVREL-NEXT:    s_endpgm
2042entry:
2043  %idx.add = add i32 %idx, 1
2044  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2045  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2046  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2047  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2048  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2049  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
2050  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
2051  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
2052  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
2053  ret void
2054}
2055
2056define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) {
2057; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2058; GPRIDX:       ; %bb.0: ; %entry
2059; GPRIDX-NEXT:    v_add_u32_e32 v18, 1, v18
2060; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v18
2061; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v18
2062; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 2, v18
2063; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v18
2064; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 4, v18
2065; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 5, v18
2066; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 7, v18
2067; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[12:13], 6, v18
2068; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v16, vcc
2069; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s[0:1]
2070; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v17, vcc
2071; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s[0:1]
2072; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v16, s[2:3]
2073; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s[4:5]
2074; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s[6:7]
2075; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s[8:9]
2076; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s[12:13]
2077; GPRIDX-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s[10:11]
2078; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s[2:3]
2079; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s[4:5]
2080; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s[6:7]
2081; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s[8:9]
2082; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s[12:13]
2083; GPRIDX-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s[10:11]
2084; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2085; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2086; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2087; GPRIDX-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2088; GPRIDX-NEXT:    s_endpgm
2089;
2090; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v_add_1:
2091; MOVREL:       ; %bb.0: ; %entry
2092; MOVREL-NEXT:    v_add_nc_u32_e32 v18, 1, v18
2093; MOVREL-NEXT:    v_mov_b32_e32 v19, v0
2094; MOVREL-NEXT:    v_mov_b32_e32 v23, v1
2095; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
2096; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v18
2097; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 2, v18
2098; MOVREL-NEXT:    v_cmp_eq_u32_e64 s2, 3, v18
2099; MOVREL-NEXT:    v_cmp_eq_u32_e64 s3, 4, v18
2100; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 5, v18
2101; MOVREL-NEXT:    v_cmp_eq_u32_e64 s5, 7, v18
2102; MOVREL-NEXT:    v_cmp_eq_u32_e64 s6, 6, v18
2103; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v19, v16, vcc_lo
2104; MOVREL-NEXT:    v_cndmask_b32_e64 v2, v2, v16, s0
2105; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v23, v17, vcc_lo
2106; MOVREL-NEXT:    v_cndmask_b32_e64 v3, v3, v17, s0
2107; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v4, v16, s1
2108; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v16, s2
2109; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v17, s1
2110; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, v17, s2
2111; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s3
2112; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s4
2113; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v17, s3
2114; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, v17, s4
2115; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, v16, s6
2116; MOVREL-NEXT:    v_cndmask_b32_e64 v14, v14, v16, s5
2117; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, v17, s6
2118; MOVREL-NEXT:    v_cndmask_b32_e64 v15, v15, v17, s5
2119; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
2120; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
2121; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off
2122; MOVREL-NEXT:    global_store_dwordx4 v[0:1], v[12:15], off
2123; MOVREL-NEXT:    s_endpgm
2124entry:
2125  %idx.add = add i32 %idx, 1
2126  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
2127  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
2128  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
2129  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
2130  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
2131  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
2132  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
2133  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
2134  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
2135  ret void
2136}
2137
2138define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
2139; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s:
2140; GPRIDX:       ; %bb.0: ; %entry
2141; GPRIDX-NEXT:    s_mov_b32 s0, s2
2142; GPRIDX-NEXT:    s_mov_b32 s1, s3
2143; GPRIDX-NEXT:    s_mov_b32 s2, s4
2144; GPRIDX-NEXT:    s_mov_b32 s3, s5
2145; GPRIDX-NEXT:    s_mov_b32 s4, s6
2146; GPRIDX-NEXT:    s_mov_b32 s5, s7
2147; GPRIDX-NEXT:    s_mov_b32 s6, s8
2148; GPRIDX-NEXT:    s_mov_b32 s7, s9
2149; GPRIDX-NEXT:    s_mov_b32 s8, s10
2150; GPRIDX-NEXT:    s_mov_b32 s9, s11
2151; GPRIDX-NEXT:    s_mov_b32 s10, s12
2152; GPRIDX-NEXT:    s_mov_b32 s11, s13
2153; GPRIDX-NEXT:    s_mov_b32 s12, s14
2154; GPRIDX-NEXT:    s_mov_b32 s13, s15
2155; GPRIDX-NEXT:    s_mov_b32 s14, s16
2156; GPRIDX-NEXT:    s_mov_b32 s15, s17
2157; GPRIDX-NEXT:    s_mov_b32 m0, s19
2158; GPRIDX-NEXT:    s_nop 0
2159; GPRIDX-NEXT:    s_movreld_b32 s0, s18
2160; GPRIDX-NEXT:    ; return to shader part epilog
2161;
2162; MOVREL-LABEL: dyn_insertelement_v16i32_s_s_s:
2163; MOVREL:       ; %bb.0: ; %entry
2164; MOVREL-NEXT:    s_mov_b32 s0, s2
2165; MOVREL-NEXT:    s_mov_b32 m0, s19
2166; MOVREL-NEXT:    s_mov_b32 s1, s3
2167; MOVREL-NEXT:    s_mov_b32 s2, s4
2168; MOVREL-NEXT:    s_mov_b32 s3, s5
2169; MOVREL-NEXT:    s_mov_b32 s4, s6
2170; MOVREL-NEXT:    s_mov_b32 s5, s7
2171; MOVREL-NEXT:    s_mov_b32 s6, s8
2172; MOVREL-NEXT:    s_mov_b32 s7, s9
2173; MOVREL-NEXT:    s_mov_b32 s8, s10
2174; MOVREL-NEXT:    s_mov_b32 s9, s11
2175; MOVREL-NEXT:    s_mov_b32 s10, s12
2176; MOVREL-NEXT:    s_mov_b32 s11, s13
2177; MOVREL-NEXT:    s_mov_b32 s12, s14
2178; MOVREL-NEXT:    s_mov_b32 s13, s15
2179; MOVREL-NEXT:    s_mov_b32 s14, s16
2180; MOVREL-NEXT:    s_mov_b32 s15, s17
2181; MOVREL-NEXT:    s_movreld_b32 s0, s18
2182; MOVREL-NEXT:    ; return to shader part epilog
2183entry:
2184  %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
2185  ret <16 x i32> %insert
2186}
2187
2188define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2189; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s:
2190; GPRIDX:       ; %bb.0: ; %entry
2191; GPRIDX-NEXT:    s_mov_b32 s0, s2
2192; GPRIDX-NEXT:    s_mov_b32 s1, s3
2193; GPRIDX-NEXT:    s_mov_b32 s2, s4
2194; GPRIDX-NEXT:    s_mov_b32 s3, s5
2195; GPRIDX-NEXT:    s_mov_b32 s4, s6
2196; GPRIDX-NEXT:    s_mov_b32 s5, s7
2197; GPRIDX-NEXT:    s_mov_b32 s6, s8
2198; GPRIDX-NEXT:    s_mov_b32 s7, s9
2199; GPRIDX-NEXT:    s_mov_b32 s8, s10
2200; GPRIDX-NEXT:    s_mov_b32 s9, s11
2201; GPRIDX-NEXT:    s_mov_b32 s10, s12
2202; GPRIDX-NEXT:    s_mov_b32 s11, s13
2203; GPRIDX-NEXT:    s_mov_b32 s12, s14
2204; GPRIDX-NEXT:    s_mov_b32 s13, s15
2205; GPRIDX-NEXT:    s_mov_b32 s14, s16
2206; GPRIDX-NEXT:    s_mov_b32 s15, s17
2207; GPRIDX-NEXT:    s_mov_b32 m0, s19
2208; GPRIDX-NEXT:    s_nop 0
2209; GPRIDX-NEXT:    s_movreld_b32 s0, s18
2210; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2211; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2212; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2213; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2214; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2215; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2216; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2217; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2218; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2219; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2220; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
2221; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
2222; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
2223; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
2224; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
2225; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
2226; GPRIDX-NEXT:    ; return to shader part epilog
2227;
2228; MOVREL-LABEL: dyn_insertelement_v16f32_s_s_s:
2229; MOVREL:       ; %bb.0: ; %entry
2230; MOVREL-NEXT:    s_mov_b32 s0, s2
2231; MOVREL-NEXT:    s_mov_b32 m0, s19
2232; MOVREL-NEXT:    s_mov_b32 s1, s3
2233; MOVREL-NEXT:    s_mov_b32 s2, s4
2234; MOVREL-NEXT:    s_mov_b32 s3, s5
2235; MOVREL-NEXT:    s_mov_b32 s4, s6
2236; MOVREL-NEXT:    s_mov_b32 s5, s7
2237; MOVREL-NEXT:    s_mov_b32 s6, s8
2238; MOVREL-NEXT:    s_mov_b32 s7, s9
2239; MOVREL-NEXT:    s_mov_b32 s8, s10
2240; MOVREL-NEXT:    s_mov_b32 s9, s11
2241; MOVREL-NEXT:    s_mov_b32 s10, s12
2242; MOVREL-NEXT:    s_mov_b32 s11, s13
2243; MOVREL-NEXT:    s_mov_b32 s12, s14
2244; MOVREL-NEXT:    s_mov_b32 s13, s15
2245; MOVREL-NEXT:    s_mov_b32 s14, s16
2246; MOVREL-NEXT:    s_mov_b32 s15, s17
2247; MOVREL-NEXT:    s_movreld_b32 s0, s18
2248; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2249; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2250; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
2251; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
2252; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
2253; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
2254; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
2255; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
2256; MOVREL-NEXT:    v_mov_b32_e32 v8, s8
2257; MOVREL-NEXT:    v_mov_b32_e32 v9, s9
2258; MOVREL-NEXT:    v_mov_b32_e32 v10, s10
2259; MOVREL-NEXT:    v_mov_b32_e32 v11, s11
2260; MOVREL-NEXT:    v_mov_b32_e32 v12, s12
2261; MOVREL-NEXT:    v_mov_b32_e32 v13, s13
2262; MOVREL-NEXT:    v_mov_b32_e32 v14, s14
2263; MOVREL-NEXT:    v_mov_b32_e32 v15, s15
2264; MOVREL-NEXT:    ; return to shader part epilog
2265entry:
2266  %insert = insertelement <16 x float> %vec, float %val, i32 %idx
2267  ret <16 x float> %insert
2268}
2269
2270define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) {
2271; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s:
2272; GPRIDX:       ; %bb.0: ; %entry
2273; GPRIDX-NEXT:    s_mov_b32 s0, s2
2274; GPRIDX-NEXT:    s_mov_b32 s1, s3
2275; GPRIDX-NEXT:    s_mov_b32 s2, s4
2276; GPRIDX-NEXT:    s_mov_b32 s3, s5
2277; GPRIDX-NEXT:    s_mov_b32 s4, s6
2278; GPRIDX-NEXT:    s_mov_b32 s5, s7
2279; GPRIDX-NEXT:    s_mov_b32 s6, s8
2280; GPRIDX-NEXT:    s_mov_b32 s7, s9
2281; GPRIDX-NEXT:    s_mov_b32 s8, s10
2282; GPRIDX-NEXT:    s_mov_b32 s9, s11
2283; GPRIDX-NEXT:    s_mov_b32 s10, s12
2284; GPRIDX-NEXT:    s_mov_b32 s11, s13
2285; GPRIDX-NEXT:    s_mov_b32 s12, s14
2286; GPRIDX-NEXT:    s_mov_b32 s13, s15
2287; GPRIDX-NEXT:    s_mov_b32 s14, s16
2288; GPRIDX-NEXT:    s_mov_b32 s15, s17
2289; GPRIDX-NEXT:    s_mov_b32 s16, s18
2290; GPRIDX-NEXT:    s_mov_b32 s17, s19
2291; GPRIDX-NEXT:    s_mov_b32 s18, s20
2292; GPRIDX-NEXT:    s_mov_b32 s19, s21
2293; GPRIDX-NEXT:    s_mov_b32 s20, s22
2294; GPRIDX-NEXT:    s_mov_b32 s21, s23
2295; GPRIDX-NEXT:    s_mov_b32 s22, s24
2296; GPRIDX-NEXT:    s_mov_b32 s23, s25
2297; GPRIDX-NEXT:    s_mov_b32 s24, s26
2298; GPRIDX-NEXT:    s_mov_b32 s25, s27
2299; GPRIDX-NEXT:    s_mov_b32 s26, s28
2300; GPRIDX-NEXT:    s_mov_b32 s27, s29
2301; GPRIDX-NEXT:    s_mov_b32 s28, s30
2302; GPRIDX-NEXT:    s_mov_b32 s29, s31
2303; GPRIDX-NEXT:    s_mov_b32 s31, s33
2304; GPRIDX-NEXT:    s_mov_b32 s30, s32
2305; GPRIDX-NEXT:    s_mov_b32 m0, s35
2306; GPRIDX-NEXT:    s_nop 0
2307; GPRIDX-NEXT:    s_movreld_b32 s0, s34
2308; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2309; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2310; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2311; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2312; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2313; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2314; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2315; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2316; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2317; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2318; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
2319; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
2320; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
2321; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
2322; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
2323; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
2324; GPRIDX-NEXT:    v_mov_b32_e32 v16, s16
2325; GPRIDX-NEXT:    v_mov_b32_e32 v17, s17
2326; GPRIDX-NEXT:    v_mov_b32_e32 v18, s18
2327; GPRIDX-NEXT:    v_mov_b32_e32 v19, s19
2328; GPRIDX-NEXT:    v_mov_b32_e32 v20, s20
2329; GPRIDX-NEXT:    v_mov_b32_e32 v21, s21
2330; GPRIDX-NEXT:    v_mov_b32_e32 v22, s22
2331; GPRIDX-NEXT:    v_mov_b32_e32 v23, s23
2332; GPRIDX-NEXT:    v_mov_b32_e32 v24, s24
2333; GPRIDX-NEXT:    v_mov_b32_e32 v25, s25
2334; GPRIDX-NEXT:    v_mov_b32_e32 v26, s26
2335; GPRIDX-NEXT:    v_mov_b32_e32 v27, s27
2336; GPRIDX-NEXT:    v_mov_b32_e32 v28, s28
2337; GPRIDX-NEXT:    v_mov_b32_e32 v29, s29
2338; GPRIDX-NEXT:    v_mov_b32_e32 v30, s30
2339; GPRIDX-NEXT:    v_mov_b32_e32 v31, s31
2340; GPRIDX-NEXT:    ; return to shader part epilog
2341;
2342; MOVREL-LABEL: dyn_insertelement_v32f32_s_s_s:
2343; MOVREL:       ; %bb.0: ; %entry
2344; MOVREL-NEXT:    s_mov_b32 s0, s2
2345; MOVREL-NEXT:    s_mov_b32 m0, s35
2346; MOVREL-NEXT:    s_mov_b32 s1, s3
2347; MOVREL-NEXT:    s_mov_b32 s2, s4
2348; MOVREL-NEXT:    s_mov_b32 s3, s5
2349; MOVREL-NEXT:    s_mov_b32 s4, s6
2350; MOVREL-NEXT:    s_mov_b32 s5, s7
2351; MOVREL-NEXT:    s_mov_b32 s6, s8
2352; MOVREL-NEXT:    s_mov_b32 s7, s9
2353; MOVREL-NEXT:    s_mov_b32 s8, s10
2354; MOVREL-NEXT:    s_mov_b32 s9, s11
2355; MOVREL-NEXT:    s_mov_b32 s10, s12
2356; MOVREL-NEXT:    s_mov_b32 s11, s13
2357; MOVREL-NEXT:    s_mov_b32 s12, s14
2358; MOVREL-NEXT:    s_mov_b32 s13, s15
2359; MOVREL-NEXT:    s_mov_b32 s14, s16
2360; MOVREL-NEXT:    s_mov_b32 s15, s17
2361; MOVREL-NEXT:    s_mov_b32 s16, s18
2362; MOVREL-NEXT:    s_mov_b32 s17, s19
2363; MOVREL-NEXT:    s_mov_b32 s18, s20
2364; MOVREL-NEXT:    s_mov_b32 s19, s21
2365; MOVREL-NEXT:    s_mov_b32 s20, s22
2366; MOVREL-NEXT:    s_mov_b32 s21, s23
2367; MOVREL-NEXT:    s_mov_b32 s22, s24
2368; MOVREL-NEXT:    s_mov_b32 s23, s25
2369; MOVREL-NEXT:    s_mov_b32 s24, s26
2370; MOVREL-NEXT:    s_mov_b32 s25, s27
2371; MOVREL-NEXT:    s_mov_b32 s26, s28
2372; MOVREL-NEXT:    s_mov_b32 s27, s29
2373; MOVREL-NEXT:    s_mov_b32 s28, s30
2374; MOVREL-NEXT:    s_mov_b32 s29, s31
2375; MOVREL-NEXT:    s_mov_b32 s31, s33
2376; MOVREL-NEXT:    s_mov_b32 s30, s32
2377; MOVREL-NEXT:    s_movreld_b32 s0, s34
2378; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2379; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2380; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
2381; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
2382; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
2383; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
2384; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
2385; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
2386; MOVREL-NEXT:    v_mov_b32_e32 v8, s8
2387; MOVREL-NEXT:    v_mov_b32_e32 v9, s9
2388; MOVREL-NEXT:    v_mov_b32_e32 v10, s10
2389; MOVREL-NEXT:    v_mov_b32_e32 v11, s11
2390; MOVREL-NEXT:    v_mov_b32_e32 v12, s12
2391; MOVREL-NEXT:    v_mov_b32_e32 v13, s13
2392; MOVREL-NEXT:    v_mov_b32_e32 v14, s14
2393; MOVREL-NEXT:    v_mov_b32_e32 v15, s15
2394; MOVREL-NEXT:    v_mov_b32_e32 v16, s16
2395; MOVREL-NEXT:    v_mov_b32_e32 v17, s17
2396; MOVREL-NEXT:    v_mov_b32_e32 v18, s18
2397; MOVREL-NEXT:    v_mov_b32_e32 v19, s19
2398; MOVREL-NEXT:    v_mov_b32_e32 v20, s20
2399; MOVREL-NEXT:    v_mov_b32_e32 v21, s21
2400; MOVREL-NEXT:    v_mov_b32_e32 v22, s22
2401; MOVREL-NEXT:    v_mov_b32_e32 v23, s23
2402; MOVREL-NEXT:    v_mov_b32_e32 v24, s24
2403; MOVREL-NEXT:    v_mov_b32_e32 v25, s25
2404; MOVREL-NEXT:    v_mov_b32_e32 v26, s26
2405; MOVREL-NEXT:    v_mov_b32_e32 v27, s27
2406; MOVREL-NEXT:    v_mov_b32_e32 v28, s28
2407; MOVREL-NEXT:    v_mov_b32_e32 v29, s29
2408; MOVREL-NEXT:    v_mov_b32_e32 v30, s30
2409; MOVREL-NEXT:    v_mov_b32_e32 v31, s31
2410; MOVREL-NEXT:    ; return to shader part epilog
2411entry:
2412  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
2413  ret <32 x float> %insert
2414}
2415
2416define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) {
2417; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s:
2418; GPRIDX:       ; %bb.0: ; %entry
2419; GPRIDX-NEXT:    s_mov_b32 s0, s2
2420; GPRIDX-NEXT:    s_mov_b32 s1, s3
2421; GPRIDX-NEXT:    s_mov_b32 s2, s4
2422; GPRIDX-NEXT:    s_mov_b32 s3, s5
2423; GPRIDX-NEXT:    s_mov_b32 s4, s6
2424; GPRIDX-NEXT:    s_mov_b32 s5, s7
2425; GPRIDX-NEXT:    s_mov_b32 s6, s8
2426; GPRIDX-NEXT:    s_mov_b32 s7, s9
2427; GPRIDX-NEXT:    s_mov_b32 s8, s10
2428; GPRIDX-NEXT:    s_mov_b32 s9, s11
2429; GPRIDX-NEXT:    s_mov_b32 s10, s12
2430; GPRIDX-NEXT:    s_mov_b32 s11, s13
2431; GPRIDX-NEXT:    s_mov_b32 s12, s14
2432; GPRIDX-NEXT:    s_mov_b32 s13, s15
2433; GPRIDX-NEXT:    s_mov_b32 s14, s16
2434; GPRIDX-NEXT:    s_mov_b32 s15, s17
2435; GPRIDX-NEXT:    s_mov_b32 s16, s18
2436; GPRIDX-NEXT:    s_mov_b32 s17, s19
2437; GPRIDX-NEXT:    s_mov_b32 s18, s20
2438; GPRIDX-NEXT:    s_mov_b32 s19, s21
2439; GPRIDX-NEXT:    s_mov_b32 s20, s22
2440; GPRIDX-NEXT:    s_mov_b32 s21, s23
2441; GPRIDX-NEXT:    s_mov_b32 s22, s24
2442; GPRIDX-NEXT:    s_mov_b32 s23, s25
2443; GPRIDX-NEXT:    s_mov_b32 s24, s26
2444; GPRIDX-NEXT:    s_mov_b32 s25, s27
2445; GPRIDX-NEXT:    s_mov_b32 s26, s28
2446; GPRIDX-NEXT:    s_mov_b32 s27, s29
2447; GPRIDX-NEXT:    s_mov_b32 s28, s30
2448; GPRIDX-NEXT:    s_mov_b32 s29, s31
2449; GPRIDX-NEXT:    s_mov_b32 s31, s33
2450; GPRIDX-NEXT:    s_mov_b32 s30, s32
2451; GPRIDX-NEXT:    s_mov_b32 m0, s36
2452; GPRIDX-NEXT:    s_nop 0
2453; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[34:35]
2454; GPRIDX-NEXT:    ; return to shader part epilog
2455;
2456; MOVREL-LABEL: dyn_insertelement_v16i64_s_s_s:
2457; MOVREL:       ; %bb.0: ; %entry
2458; MOVREL-NEXT:    s_mov_b32 s0, s2
2459; MOVREL-NEXT:    s_mov_b32 s1, s3
2460; MOVREL-NEXT:    s_mov_b32 m0, s36
2461; MOVREL-NEXT:    s_mov_b32 s2, s4
2462; MOVREL-NEXT:    s_mov_b32 s3, s5
2463; MOVREL-NEXT:    s_mov_b32 s4, s6
2464; MOVREL-NEXT:    s_mov_b32 s5, s7
2465; MOVREL-NEXT:    s_mov_b32 s6, s8
2466; MOVREL-NEXT:    s_mov_b32 s7, s9
2467; MOVREL-NEXT:    s_mov_b32 s8, s10
2468; MOVREL-NEXT:    s_mov_b32 s9, s11
2469; MOVREL-NEXT:    s_mov_b32 s10, s12
2470; MOVREL-NEXT:    s_mov_b32 s11, s13
2471; MOVREL-NEXT:    s_mov_b32 s12, s14
2472; MOVREL-NEXT:    s_mov_b32 s13, s15
2473; MOVREL-NEXT:    s_mov_b32 s14, s16
2474; MOVREL-NEXT:    s_mov_b32 s15, s17
2475; MOVREL-NEXT:    s_mov_b32 s16, s18
2476; MOVREL-NEXT:    s_mov_b32 s17, s19
2477; MOVREL-NEXT:    s_mov_b32 s18, s20
2478; MOVREL-NEXT:    s_mov_b32 s19, s21
2479; MOVREL-NEXT:    s_mov_b32 s20, s22
2480; MOVREL-NEXT:    s_mov_b32 s21, s23
2481; MOVREL-NEXT:    s_mov_b32 s22, s24
2482; MOVREL-NEXT:    s_mov_b32 s23, s25
2483; MOVREL-NEXT:    s_mov_b32 s24, s26
2484; MOVREL-NEXT:    s_mov_b32 s25, s27
2485; MOVREL-NEXT:    s_mov_b32 s26, s28
2486; MOVREL-NEXT:    s_mov_b32 s27, s29
2487; MOVREL-NEXT:    s_mov_b32 s28, s30
2488; MOVREL-NEXT:    s_mov_b32 s29, s31
2489; MOVREL-NEXT:    s_mov_b32 s31, s33
2490; MOVREL-NEXT:    s_mov_b32 s30, s32
2491; MOVREL-NEXT:    s_movreld_b64 s[0:1], s[34:35]
2492; MOVREL-NEXT:    ; return to shader part epilog
2493entry:
2494  %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
2495  ret <16 x i64> %insert
2496}
2497
2498define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
2499; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s:
2500; GPRIDX:       ; %bb.0: ; %entry
2501; GPRIDX-NEXT:    s_mov_b32 s0, s2
2502; GPRIDX-NEXT:    s_mov_b32 s1, s3
2503; GPRIDX-NEXT:    s_mov_b32 s2, s4
2504; GPRIDX-NEXT:    s_mov_b32 s3, s5
2505; GPRIDX-NEXT:    s_mov_b32 s4, s6
2506; GPRIDX-NEXT:    s_mov_b32 s5, s7
2507; GPRIDX-NEXT:    s_mov_b32 s6, s8
2508; GPRIDX-NEXT:    s_mov_b32 s7, s9
2509; GPRIDX-NEXT:    s_mov_b32 s8, s10
2510; GPRIDX-NEXT:    s_mov_b32 s9, s11
2511; GPRIDX-NEXT:    s_mov_b32 s10, s12
2512; GPRIDX-NEXT:    s_mov_b32 s11, s13
2513; GPRIDX-NEXT:    s_mov_b32 s12, s14
2514; GPRIDX-NEXT:    s_mov_b32 s13, s15
2515; GPRIDX-NEXT:    s_mov_b32 s14, s16
2516; GPRIDX-NEXT:    s_mov_b32 s15, s17
2517; GPRIDX-NEXT:    s_mov_b32 s16, s18
2518; GPRIDX-NEXT:    s_mov_b32 s17, s19
2519; GPRIDX-NEXT:    s_mov_b32 s18, s20
2520; GPRIDX-NEXT:    s_mov_b32 s19, s21
2521; GPRIDX-NEXT:    s_mov_b32 s20, s22
2522; GPRIDX-NEXT:    s_mov_b32 s21, s23
2523; GPRIDX-NEXT:    s_mov_b32 s22, s24
2524; GPRIDX-NEXT:    s_mov_b32 s23, s25
2525; GPRIDX-NEXT:    s_mov_b32 s24, s26
2526; GPRIDX-NEXT:    s_mov_b32 s25, s27
2527; GPRIDX-NEXT:    s_mov_b32 s26, s28
2528; GPRIDX-NEXT:    s_mov_b32 s27, s29
2529; GPRIDX-NEXT:    s_mov_b32 s28, s30
2530; GPRIDX-NEXT:    s_mov_b32 s29, s31
2531; GPRIDX-NEXT:    s_mov_b32 s31, s33
2532; GPRIDX-NEXT:    s_mov_b32 s30, s32
2533; GPRIDX-NEXT:    s_mov_b32 m0, s36
2534; GPRIDX-NEXT:    s_nop 0
2535; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[34:35]
2536; GPRIDX-NEXT:    ; return to shader part epilog
2537;
2538; MOVREL-LABEL: dyn_insertelement_v16f64_s_s_s:
2539; MOVREL:       ; %bb.0: ; %entry
2540; MOVREL-NEXT:    s_mov_b32 s0, s2
2541; MOVREL-NEXT:    s_mov_b32 s1, s3
2542; MOVREL-NEXT:    s_mov_b32 m0, s36
2543; MOVREL-NEXT:    s_mov_b32 s2, s4
2544; MOVREL-NEXT:    s_mov_b32 s3, s5
2545; MOVREL-NEXT:    s_mov_b32 s4, s6
2546; MOVREL-NEXT:    s_mov_b32 s5, s7
2547; MOVREL-NEXT:    s_mov_b32 s6, s8
2548; MOVREL-NEXT:    s_mov_b32 s7, s9
2549; MOVREL-NEXT:    s_mov_b32 s8, s10
2550; MOVREL-NEXT:    s_mov_b32 s9, s11
2551; MOVREL-NEXT:    s_mov_b32 s10, s12
2552; MOVREL-NEXT:    s_mov_b32 s11, s13
2553; MOVREL-NEXT:    s_mov_b32 s12, s14
2554; MOVREL-NEXT:    s_mov_b32 s13, s15
2555; MOVREL-NEXT:    s_mov_b32 s14, s16
2556; MOVREL-NEXT:    s_mov_b32 s15, s17
2557; MOVREL-NEXT:    s_mov_b32 s16, s18
2558; MOVREL-NEXT:    s_mov_b32 s17, s19
2559; MOVREL-NEXT:    s_mov_b32 s18, s20
2560; MOVREL-NEXT:    s_mov_b32 s19, s21
2561; MOVREL-NEXT:    s_mov_b32 s20, s22
2562; MOVREL-NEXT:    s_mov_b32 s21, s23
2563; MOVREL-NEXT:    s_mov_b32 s22, s24
2564; MOVREL-NEXT:    s_mov_b32 s23, s25
2565; MOVREL-NEXT:    s_mov_b32 s24, s26
2566; MOVREL-NEXT:    s_mov_b32 s25, s27
2567; MOVREL-NEXT:    s_mov_b32 s26, s28
2568; MOVREL-NEXT:    s_mov_b32 s27, s29
2569; MOVREL-NEXT:    s_mov_b32 s28, s30
2570; MOVREL-NEXT:    s_mov_b32 s29, s31
2571; MOVREL-NEXT:    s_mov_b32 s31, s33
2572; MOVREL-NEXT:    s_mov_b32 s30, s32
2573; MOVREL-NEXT:    s_movreld_b64 s[0:1], s[34:35]
2574; MOVREL-NEXT:    ; return to shader part epilog
2575entry:
2576  %insert = insertelement <16 x double> %vec, double %val, i32 %idx
2577  ret <16 x double> %insert
2578}
2579
2580define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) {
2581; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s:
2582; GPRIDX:       ; %bb.0: ; %entry
2583; GPRIDX-NEXT:    s_mov_b32 s1, s3
2584; GPRIDX-NEXT:    s_mov_b32 s3, s5
2585; GPRIDX-NEXT:    s_mov_b32 s5, s7
2586; GPRIDX-NEXT:    s_mov_b32 s7, s9
2587; GPRIDX-NEXT:    s_mov_b32 s9, s11
2588; GPRIDX-NEXT:    s_mov_b32 s11, s13
2589; GPRIDX-NEXT:    s_mov_b32 s13, s15
2590; GPRIDX-NEXT:    s_mov_b32 s15, s17
2591; GPRIDX-NEXT:    s_mov_b32 s0, s2
2592; GPRIDX-NEXT:    s_mov_b32 s2, s4
2593; GPRIDX-NEXT:    s_mov_b32 s4, s6
2594; GPRIDX-NEXT:    s_mov_b32 s6, s8
2595; GPRIDX-NEXT:    s_mov_b32 s8, s10
2596; GPRIDX-NEXT:    s_mov_b32 s10, s12
2597; GPRIDX-NEXT:    s_mov_b32 s12, s14
2598; GPRIDX-NEXT:    s_mov_b32 s14, s16
2599; GPRIDX-NEXT:    v_mov_b32_e32 v16, s15
2600; GPRIDX-NEXT:    v_mov_b32_e32 v15, s14
2601; GPRIDX-NEXT:    v_mov_b32_e32 v14, s13
2602; GPRIDX-NEXT:    v_mov_b32_e32 v13, s12
2603; GPRIDX-NEXT:    v_mov_b32_e32 v12, s11
2604; GPRIDX-NEXT:    v_mov_b32_e32 v11, s10
2605; GPRIDX-NEXT:    v_mov_b32_e32 v10, s9
2606; GPRIDX-NEXT:    v_mov_b32_e32 v9, s8
2607; GPRIDX-NEXT:    v_mov_b32_e32 v8, s7
2608; GPRIDX-NEXT:    v_mov_b32_e32 v7, s6
2609; GPRIDX-NEXT:    v_mov_b32_e32 v6, s5
2610; GPRIDX-NEXT:    v_mov_b32_e32 v5, s4
2611; GPRIDX-NEXT:    v_mov_b32_e32 v4, s3
2612; GPRIDX-NEXT:    v_mov_b32_e32 v3, s2
2613; GPRIDX-NEXT:    v_mov_b32_e32 v2, s1
2614; GPRIDX-NEXT:    v_mov_b32_e32 v1, s0
2615; GPRIDX-NEXT:    s_set_gpr_idx_on s18, gpr_idx(DST)
2616; GPRIDX-NEXT:    v_mov_b32_e32 v1, v0
2617; GPRIDX-NEXT:    s_set_gpr_idx_off
2618; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v1
2619; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v2
2620; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v3
2621; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v4
2622; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v5
2623; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v6
2624; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v7
2625; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v8
2626; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v9
2627; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v10
2628; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v11
2629; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v12
2630; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v13
2631; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v14
2632; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v15
2633; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v16
2634; GPRIDX-NEXT:    ; return to shader part epilog
2635;
2636; MOVREL-LABEL: dyn_insertelement_v16i32_s_v_s:
2637; MOVREL:       ; %bb.0: ; %entry
2638; MOVREL-NEXT:    s_mov_b32 s1, s3
2639; MOVREL-NEXT:    s_mov_b32 s3, s5
2640; MOVREL-NEXT:    s_mov_b32 s5, s7
2641; MOVREL-NEXT:    s_mov_b32 s7, s9
2642; MOVREL-NEXT:    s_mov_b32 s9, s11
2643; MOVREL-NEXT:    s_mov_b32 s11, s13
2644; MOVREL-NEXT:    s_mov_b32 s13, s15
2645; MOVREL-NEXT:    s_mov_b32 s15, s17
2646; MOVREL-NEXT:    s_mov_b32 s0, s2
2647; MOVREL-NEXT:    s_mov_b32 s2, s4
2648; MOVREL-NEXT:    s_mov_b32 s4, s6
2649; MOVREL-NEXT:    s_mov_b32 s6, s8
2650; MOVREL-NEXT:    s_mov_b32 s8, s10
2651; MOVREL-NEXT:    s_mov_b32 s10, s12
2652; MOVREL-NEXT:    s_mov_b32 s12, s14
2653; MOVREL-NEXT:    s_mov_b32 s14, s16
2654; MOVREL-NEXT:    v_mov_b32_e32 v16, s15
2655; MOVREL-NEXT:    v_mov_b32_e32 v1, s0
2656; MOVREL-NEXT:    s_mov_b32 m0, s18
2657; MOVREL-NEXT:    v_mov_b32_e32 v15, s14
2658; MOVREL-NEXT:    v_mov_b32_e32 v14, s13
2659; MOVREL-NEXT:    v_mov_b32_e32 v13, s12
2660; MOVREL-NEXT:    v_mov_b32_e32 v12, s11
2661; MOVREL-NEXT:    v_mov_b32_e32 v11, s10
2662; MOVREL-NEXT:    v_mov_b32_e32 v10, s9
2663; MOVREL-NEXT:    v_mov_b32_e32 v9, s8
2664; MOVREL-NEXT:    v_mov_b32_e32 v8, s7
2665; MOVREL-NEXT:    v_mov_b32_e32 v7, s6
2666; MOVREL-NEXT:    v_mov_b32_e32 v6, s5
2667; MOVREL-NEXT:    v_mov_b32_e32 v5, s4
2668; MOVREL-NEXT:    v_mov_b32_e32 v4, s3
2669; MOVREL-NEXT:    v_mov_b32_e32 v3, s2
2670; MOVREL-NEXT:    v_mov_b32_e32 v2, s1
2671; MOVREL-NEXT:    v_movreld_b32_e32 v1, v0
2672; MOVREL-NEXT:    v_readfirstlane_b32 s0, v1
2673; MOVREL-NEXT:    v_readfirstlane_b32 s1, v2
2674; MOVREL-NEXT:    v_readfirstlane_b32 s2, v3
2675; MOVREL-NEXT:    v_readfirstlane_b32 s3, v4
2676; MOVREL-NEXT:    v_readfirstlane_b32 s4, v5
2677; MOVREL-NEXT:    v_readfirstlane_b32 s5, v6
2678; MOVREL-NEXT:    v_readfirstlane_b32 s6, v7
2679; MOVREL-NEXT:    v_readfirstlane_b32 s7, v8
2680; MOVREL-NEXT:    v_readfirstlane_b32 s8, v9
2681; MOVREL-NEXT:    v_readfirstlane_b32 s9, v10
2682; MOVREL-NEXT:    v_readfirstlane_b32 s10, v11
2683; MOVREL-NEXT:    v_readfirstlane_b32 s11, v12
2684; MOVREL-NEXT:    v_readfirstlane_b32 s12, v13
2685; MOVREL-NEXT:    v_readfirstlane_b32 s13, v14
2686; MOVREL-NEXT:    v_readfirstlane_b32 s14, v15
2687; MOVREL-NEXT:    v_readfirstlane_b32 s15, v16
2688; MOVREL-NEXT:    ; return to shader part epilog
2689entry:
2690  %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx
2691  ret <16 x i32> %insert
2692}
2693
2694define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) {
2695; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s:
2696; GPRIDX:       ; %bb.0: ; %entry
2697; GPRIDX-NEXT:    s_mov_b32 s0, s2
2698; GPRIDX-NEXT:    s_mov_b32 s1, s3
2699; GPRIDX-NEXT:    s_mov_b32 s2, s4
2700; GPRIDX-NEXT:    s_mov_b32 s3, s5
2701; GPRIDX-NEXT:    s_mov_b32 s4, s6
2702; GPRIDX-NEXT:    s_mov_b32 s5, s7
2703; GPRIDX-NEXT:    s_mov_b32 s6, s8
2704; GPRIDX-NEXT:    s_mov_b32 s7, s9
2705; GPRIDX-NEXT:    s_mov_b32 s8, s10
2706; GPRIDX-NEXT:    s_mov_b32 s9, s11
2707; GPRIDX-NEXT:    s_mov_b32 s10, s12
2708; GPRIDX-NEXT:    s_mov_b32 s11, s13
2709; GPRIDX-NEXT:    s_mov_b32 s12, s14
2710; GPRIDX-NEXT:    s_mov_b32 s13, s15
2711; GPRIDX-NEXT:    s_mov_b32 s14, s16
2712; GPRIDX-NEXT:    s_mov_b32 s15, s17
2713; GPRIDX-NEXT:    v_mov_b32_e32 v16, v0
2714; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2715; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2716; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2717; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2718; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2719; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2720; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2721; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2722; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2723; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2724; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
2725; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
2726; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
2727; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
2728; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
2729; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
2730; GPRIDX-NEXT:    s_set_gpr_idx_on s18, gpr_idx(DST)
2731; GPRIDX-NEXT:    v_mov_b32_e32 v0, v16
2732; GPRIDX-NEXT:    s_set_gpr_idx_off
2733; GPRIDX-NEXT:    ; return to shader part epilog
2734;
2735; MOVREL-LABEL: dyn_insertelement_v16f32_s_v_s:
2736; MOVREL:       ; %bb.0: ; %entry
2737; MOVREL-NEXT:    s_mov_b32 s0, s2
2738; MOVREL-NEXT:    s_mov_b32 s1, s3
2739; MOVREL-NEXT:    s_mov_b32 s2, s4
2740; MOVREL-NEXT:    s_mov_b32 s3, s5
2741; MOVREL-NEXT:    s_mov_b32 s4, s6
2742; MOVREL-NEXT:    s_mov_b32 s5, s7
2743; MOVREL-NEXT:    s_mov_b32 s6, s8
2744; MOVREL-NEXT:    s_mov_b32 s7, s9
2745; MOVREL-NEXT:    s_mov_b32 s8, s10
2746; MOVREL-NEXT:    s_mov_b32 s9, s11
2747; MOVREL-NEXT:    s_mov_b32 s10, s12
2748; MOVREL-NEXT:    s_mov_b32 s11, s13
2749; MOVREL-NEXT:    s_mov_b32 s12, s14
2750; MOVREL-NEXT:    s_mov_b32 s13, s15
2751; MOVREL-NEXT:    s_mov_b32 s14, s16
2752; MOVREL-NEXT:    s_mov_b32 s15, s17
2753; MOVREL-NEXT:    v_mov_b32_e32 v16, v0
2754; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2755; MOVREL-NEXT:    s_mov_b32 m0, s18
2756; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2757; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
2758; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
2759; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
2760; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
2761; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
2762; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
2763; MOVREL-NEXT:    v_mov_b32_e32 v8, s8
2764; MOVREL-NEXT:    v_mov_b32_e32 v9, s9
2765; MOVREL-NEXT:    v_mov_b32_e32 v10, s10
2766; MOVREL-NEXT:    v_mov_b32_e32 v11, s11
2767; MOVREL-NEXT:    v_mov_b32_e32 v12, s12
2768; MOVREL-NEXT:    v_mov_b32_e32 v13, s13
2769; MOVREL-NEXT:    v_mov_b32_e32 v14, s14
2770; MOVREL-NEXT:    v_mov_b32_e32 v15, s15
2771; MOVREL-NEXT:    v_movreld_b32_e32 v0, v16
2772; MOVREL-NEXT:    ; return to shader part epilog
2773entry:
2774  %insert = insertelement <16 x float> %vec, float %val, i32 %idx
2775  ret <16 x float> %insert
2776}
2777
2778define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) {
2779; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s:
2780; GPRIDX:       ; %bb.0: ; %entry
2781; GPRIDX-NEXT:    s_mov_b32 s0, s2
2782; GPRIDX-NEXT:    s_mov_b32 s1, s3
2783; GPRIDX-NEXT:    s_mov_b32 s2, s4
2784; GPRIDX-NEXT:    s_mov_b32 s3, s5
2785; GPRIDX-NEXT:    s_mov_b32 s4, s6
2786; GPRIDX-NEXT:    s_mov_b32 s5, s7
2787; GPRIDX-NEXT:    s_mov_b32 s6, s8
2788; GPRIDX-NEXT:    s_mov_b32 s7, s9
2789; GPRIDX-NEXT:    s_mov_b32 s8, s10
2790; GPRIDX-NEXT:    s_mov_b32 s9, s11
2791; GPRIDX-NEXT:    s_mov_b32 s10, s12
2792; GPRIDX-NEXT:    s_mov_b32 s11, s13
2793; GPRIDX-NEXT:    s_mov_b32 s12, s14
2794; GPRIDX-NEXT:    s_mov_b32 s13, s15
2795; GPRIDX-NEXT:    s_mov_b32 s14, s16
2796; GPRIDX-NEXT:    s_mov_b32 s15, s17
2797; GPRIDX-NEXT:    s_mov_b32 s16, s18
2798; GPRIDX-NEXT:    s_mov_b32 s17, s19
2799; GPRIDX-NEXT:    s_mov_b32 s18, s20
2800; GPRIDX-NEXT:    s_mov_b32 s19, s21
2801; GPRIDX-NEXT:    s_mov_b32 s20, s22
2802; GPRIDX-NEXT:    s_mov_b32 s21, s23
2803; GPRIDX-NEXT:    s_mov_b32 s22, s24
2804; GPRIDX-NEXT:    s_mov_b32 s23, s25
2805; GPRIDX-NEXT:    s_mov_b32 s24, s26
2806; GPRIDX-NEXT:    s_mov_b32 s25, s27
2807; GPRIDX-NEXT:    s_mov_b32 s26, s28
2808; GPRIDX-NEXT:    s_mov_b32 s27, s29
2809; GPRIDX-NEXT:    s_mov_b32 s28, s30
2810; GPRIDX-NEXT:    s_mov_b32 s29, s31
2811; GPRIDX-NEXT:    s_mov_b32 s31, s33
2812; GPRIDX-NEXT:    v_mov_b32_e32 v32, v0
2813; GPRIDX-NEXT:    s_mov_b32 s30, s32
2814; GPRIDX-NEXT:    v_mov_b32_e32 v0, s0
2815; GPRIDX-NEXT:    v_mov_b32_e32 v1, s1
2816; GPRIDX-NEXT:    v_mov_b32_e32 v2, s2
2817; GPRIDX-NEXT:    v_mov_b32_e32 v3, s3
2818; GPRIDX-NEXT:    v_mov_b32_e32 v4, s4
2819; GPRIDX-NEXT:    v_mov_b32_e32 v5, s5
2820; GPRIDX-NEXT:    v_mov_b32_e32 v6, s6
2821; GPRIDX-NEXT:    v_mov_b32_e32 v7, s7
2822; GPRIDX-NEXT:    v_mov_b32_e32 v8, s8
2823; GPRIDX-NEXT:    v_mov_b32_e32 v9, s9
2824; GPRIDX-NEXT:    v_mov_b32_e32 v10, s10
2825; GPRIDX-NEXT:    v_mov_b32_e32 v11, s11
2826; GPRIDX-NEXT:    v_mov_b32_e32 v12, s12
2827; GPRIDX-NEXT:    v_mov_b32_e32 v13, s13
2828; GPRIDX-NEXT:    v_mov_b32_e32 v14, s14
2829; GPRIDX-NEXT:    v_mov_b32_e32 v15, s15
2830; GPRIDX-NEXT:    v_mov_b32_e32 v16, s16
2831; GPRIDX-NEXT:    v_mov_b32_e32 v17, s17
2832; GPRIDX-NEXT:    v_mov_b32_e32 v18, s18
2833; GPRIDX-NEXT:    v_mov_b32_e32 v19, s19
2834; GPRIDX-NEXT:    v_mov_b32_e32 v20, s20
2835; GPRIDX-NEXT:    v_mov_b32_e32 v21, s21
2836; GPRIDX-NEXT:    v_mov_b32_e32 v22, s22
2837; GPRIDX-NEXT:    v_mov_b32_e32 v23, s23
2838; GPRIDX-NEXT:    v_mov_b32_e32 v24, s24
2839; GPRIDX-NEXT:    v_mov_b32_e32 v25, s25
2840; GPRIDX-NEXT:    v_mov_b32_e32 v26, s26
2841; GPRIDX-NEXT:    v_mov_b32_e32 v27, s27
2842; GPRIDX-NEXT:    v_mov_b32_e32 v28, s28
2843; GPRIDX-NEXT:    v_mov_b32_e32 v29, s29
2844; GPRIDX-NEXT:    v_mov_b32_e32 v30, s30
2845; GPRIDX-NEXT:    v_mov_b32_e32 v31, s31
2846; GPRIDX-NEXT:    s_set_gpr_idx_on s34, gpr_idx(DST)
2847; GPRIDX-NEXT:    v_mov_b32_e32 v0, v32
2848; GPRIDX-NEXT:    s_set_gpr_idx_off
2849; GPRIDX-NEXT:    ; return to shader part epilog
2850;
2851; MOVREL-LABEL: dyn_insertelement_v32f32_s_v_s:
2852; MOVREL:       ; %bb.0: ; %entry
2853; MOVREL-NEXT:    s_mov_b32 s0, s2
2854; MOVREL-NEXT:    s_mov_b32 s1, s3
2855; MOVREL-NEXT:    s_mov_b32 s2, s4
2856; MOVREL-NEXT:    s_mov_b32 s3, s5
2857; MOVREL-NEXT:    s_mov_b32 s4, s6
2858; MOVREL-NEXT:    s_mov_b32 s5, s7
2859; MOVREL-NEXT:    s_mov_b32 s6, s8
2860; MOVREL-NEXT:    s_mov_b32 s7, s9
2861; MOVREL-NEXT:    s_mov_b32 s8, s10
2862; MOVREL-NEXT:    s_mov_b32 s9, s11
2863; MOVREL-NEXT:    s_mov_b32 s10, s12
2864; MOVREL-NEXT:    s_mov_b32 s11, s13
2865; MOVREL-NEXT:    s_mov_b32 s12, s14
2866; MOVREL-NEXT:    s_mov_b32 s13, s15
2867; MOVREL-NEXT:    s_mov_b32 s14, s16
2868; MOVREL-NEXT:    s_mov_b32 s15, s17
2869; MOVREL-NEXT:    s_mov_b32 s16, s18
2870; MOVREL-NEXT:    s_mov_b32 s17, s19
2871; MOVREL-NEXT:    s_mov_b32 s18, s20
2872; MOVREL-NEXT:    s_mov_b32 s19, s21
2873; MOVREL-NEXT:    s_mov_b32 s20, s22
2874; MOVREL-NEXT:    s_mov_b32 s21, s23
2875; MOVREL-NEXT:    s_mov_b32 s22, s24
2876; MOVREL-NEXT:    s_mov_b32 s23, s25
2877; MOVREL-NEXT:    s_mov_b32 s24, s26
2878; MOVREL-NEXT:    s_mov_b32 s25, s27
2879; MOVREL-NEXT:    s_mov_b32 s26, s28
2880; MOVREL-NEXT:    s_mov_b32 s27, s29
2881; MOVREL-NEXT:    s_mov_b32 s28, s30
2882; MOVREL-NEXT:    s_mov_b32 s29, s31
2883; MOVREL-NEXT:    s_mov_b32 s31, s33
2884; MOVREL-NEXT:    v_mov_b32_e32 v32, v0
2885; MOVREL-NEXT:    s_mov_b32 s30, s32
2886; MOVREL-NEXT:    v_mov_b32_e32 v0, s0
2887; MOVREL-NEXT:    s_mov_b32 m0, s34
2888; MOVREL-NEXT:    v_mov_b32_e32 v1, s1
2889; MOVREL-NEXT:    v_mov_b32_e32 v2, s2
2890; MOVREL-NEXT:    v_mov_b32_e32 v3, s3
2891; MOVREL-NEXT:    v_mov_b32_e32 v4, s4
2892; MOVREL-NEXT:    v_mov_b32_e32 v5, s5
2893; MOVREL-NEXT:    v_mov_b32_e32 v6, s6
2894; MOVREL-NEXT:    v_mov_b32_e32 v7, s7
2895; MOVREL-NEXT:    v_mov_b32_e32 v8, s8
2896; MOVREL-NEXT:    v_mov_b32_e32 v9, s9
2897; MOVREL-NEXT:    v_mov_b32_e32 v10, s10
2898; MOVREL-NEXT:    v_mov_b32_e32 v11, s11
2899; MOVREL-NEXT:    v_mov_b32_e32 v12, s12
2900; MOVREL-NEXT:    v_mov_b32_e32 v13, s13
2901; MOVREL-NEXT:    v_mov_b32_e32 v14, s14
2902; MOVREL-NEXT:    v_mov_b32_e32 v15, s15
2903; MOVREL-NEXT:    v_mov_b32_e32 v16, s16
2904; MOVREL-NEXT:    v_mov_b32_e32 v17, s17
2905; MOVREL-NEXT:    v_mov_b32_e32 v18, s18
2906; MOVREL-NEXT:    v_mov_b32_e32 v19, s19
2907; MOVREL-NEXT:    v_mov_b32_e32 v20, s20
2908; MOVREL-NEXT:    v_mov_b32_e32 v21, s21
2909; MOVREL-NEXT:    v_mov_b32_e32 v22, s22
2910; MOVREL-NEXT:    v_mov_b32_e32 v23, s23
2911; MOVREL-NEXT:    v_mov_b32_e32 v24, s24
2912; MOVREL-NEXT:    v_mov_b32_e32 v25, s25
2913; MOVREL-NEXT:    v_mov_b32_e32 v26, s26
2914; MOVREL-NEXT:    v_mov_b32_e32 v27, s27
2915; MOVREL-NEXT:    v_mov_b32_e32 v28, s28
2916; MOVREL-NEXT:    v_mov_b32_e32 v29, s29
2917; MOVREL-NEXT:    v_mov_b32_e32 v30, s30
2918; MOVREL-NEXT:    v_mov_b32_e32 v31, s31
2919; MOVREL-NEXT:    v_movreld_b32_e32 v0, v32
2920; MOVREL-NEXT:    ; return to shader part epilog
2921entry:
2922  %insert = insertelement <32 x float> %vec, float %val, i32 %idx
2923  ret <32 x float> %insert
2924}
2925
2926define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) {
2927; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s:
2928; GPRIDX:       ; %bb.0: ; %entry
2929; GPRIDX-NEXT:    s_mov_b32 s1, s3
2930; GPRIDX-NEXT:    s_mov_b32 s3, s5
2931; GPRIDX-NEXT:    s_mov_b32 s5, s7
2932; GPRIDX-NEXT:    s_mov_b32 s7, s9
2933; GPRIDX-NEXT:    s_mov_b32 s9, s11
2934; GPRIDX-NEXT:    s_mov_b32 s11, s13
2935; GPRIDX-NEXT:    s_mov_b32 s13, s15
2936; GPRIDX-NEXT:    s_mov_b32 s15, s17
2937; GPRIDX-NEXT:    s_mov_b32 s17, s19
2938; GPRIDX-NEXT:    s_mov_b32 s19, s21
2939; GPRIDX-NEXT:    s_mov_b32 s21, s23
2940; GPRIDX-NEXT:    s_mov_b32 s23, s25
2941; GPRIDX-NEXT:    s_mov_b32 s25, s27
2942; GPRIDX-NEXT:    s_mov_b32 s27, s29
2943; GPRIDX-NEXT:    s_mov_b32 s29, s31
2944; GPRIDX-NEXT:    s_mov_b32 s31, s33
2945; GPRIDX-NEXT:    s_mov_b32 s0, s2
2946; GPRIDX-NEXT:    s_mov_b32 s2, s4
2947; GPRIDX-NEXT:    s_mov_b32 s4, s6
2948; GPRIDX-NEXT:    s_mov_b32 s6, s8
2949; GPRIDX-NEXT:    s_mov_b32 s8, s10
2950; GPRIDX-NEXT:    s_mov_b32 s10, s12
2951; GPRIDX-NEXT:    s_mov_b32 s12, s14
2952; GPRIDX-NEXT:    s_mov_b32 s14, s16
2953; GPRIDX-NEXT:    s_mov_b32 s16, s18
2954; GPRIDX-NEXT:    s_mov_b32 s18, s20
2955; GPRIDX-NEXT:    s_mov_b32 s20, s22
2956; GPRIDX-NEXT:    s_mov_b32 s22, s24
2957; GPRIDX-NEXT:    s_mov_b32 s24, s26
2958; GPRIDX-NEXT:    s_mov_b32 s26, s28
2959; GPRIDX-NEXT:    s_mov_b32 s28, s30
2960; GPRIDX-NEXT:    s_mov_b32 s30, s32
2961; GPRIDX-NEXT:    v_mov_b32_e32 v33, s31
2962; GPRIDX-NEXT:    s_lshl_b32 s33, s34, 1
2963; GPRIDX-NEXT:    v_mov_b32_e32 v32, s30
2964; GPRIDX-NEXT:    v_mov_b32_e32 v31, s29
2965; GPRIDX-NEXT:    v_mov_b32_e32 v30, s28
2966; GPRIDX-NEXT:    v_mov_b32_e32 v29, s27
2967; GPRIDX-NEXT:    v_mov_b32_e32 v28, s26
2968; GPRIDX-NEXT:    v_mov_b32_e32 v27, s25
2969; GPRIDX-NEXT:    v_mov_b32_e32 v26, s24
2970; GPRIDX-NEXT:    v_mov_b32_e32 v25, s23
2971; GPRIDX-NEXT:    v_mov_b32_e32 v24, s22
2972; GPRIDX-NEXT:    v_mov_b32_e32 v23, s21
2973; GPRIDX-NEXT:    v_mov_b32_e32 v22, s20
2974; GPRIDX-NEXT:    v_mov_b32_e32 v21, s19
2975; GPRIDX-NEXT:    v_mov_b32_e32 v20, s18
2976; GPRIDX-NEXT:    v_mov_b32_e32 v19, s17
2977; GPRIDX-NEXT:    v_mov_b32_e32 v18, s16
2978; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
2979; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
2980; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
2981; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
2982; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
2983; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
2984; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
2985; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
2986; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
2987; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
2988; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
2989; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
2990; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
2991; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
2992; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
2993; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
2994; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
2995; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
2996; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
2997; GPRIDX-NEXT:    s_set_gpr_idx_off
2998; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
2999; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
3000; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
3001; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
3002; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
3003; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
3004; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
3005; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
3006; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
3007; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
3008; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
3009; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
3010; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
3011; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
3012; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v16
3013; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v17
3014; GPRIDX-NEXT:    v_readfirstlane_b32 s16, v18
3015; GPRIDX-NEXT:    v_readfirstlane_b32 s17, v19
3016; GPRIDX-NEXT:    v_readfirstlane_b32 s18, v20
3017; GPRIDX-NEXT:    v_readfirstlane_b32 s19, v21
3018; GPRIDX-NEXT:    v_readfirstlane_b32 s20, v22
3019; GPRIDX-NEXT:    v_readfirstlane_b32 s21, v23
3020; GPRIDX-NEXT:    v_readfirstlane_b32 s22, v24
3021; GPRIDX-NEXT:    v_readfirstlane_b32 s23, v25
3022; GPRIDX-NEXT:    v_readfirstlane_b32 s24, v26
3023; GPRIDX-NEXT:    v_readfirstlane_b32 s25, v27
3024; GPRIDX-NEXT:    v_readfirstlane_b32 s26, v28
3025; GPRIDX-NEXT:    v_readfirstlane_b32 s27, v29
3026; GPRIDX-NEXT:    v_readfirstlane_b32 s28, v30
3027; GPRIDX-NEXT:    v_readfirstlane_b32 s29, v31
3028; GPRIDX-NEXT:    v_readfirstlane_b32 s30, v32
3029; GPRIDX-NEXT:    v_readfirstlane_b32 s31, v33
3030; GPRIDX-NEXT:    ; return to shader part epilog
3031;
3032; MOVREL-LABEL: dyn_insertelement_v16i64_s_v_s:
3033; MOVREL:       ; %bb.0: ; %entry
3034; MOVREL-NEXT:    s_mov_b32 s1, s3
3035; MOVREL-NEXT:    s_mov_b32 s3, s5
3036; MOVREL-NEXT:    s_mov_b32 s5, s7
3037; MOVREL-NEXT:    s_mov_b32 s7, s9
3038; MOVREL-NEXT:    s_mov_b32 s9, s11
3039; MOVREL-NEXT:    s_mov_b32 s11, s13
3040; MOVREL-NEXT:    s_mov_b32 s13, s15
3041; MOVREL-NEXT:    s_mov_b32 s15, s17
3042; MOVREL-NEXT:    s_mov_b32 s17, s19
3043; MOVREL-NEXT:    s_mov_b32 s19, s21
3044; MOVREL-NEXT:    s_mov_b32 s21, s23
3045; MOVREL-NEXT:    s_mov_b32 s23, s25
3046; MOVREL-NEXT:    s_mov_b32 s25, s27
3047; MOVREL-NEXT:    s_mov_b32 s27, s29
3048; MOVREL-NEXT:    s_mov_b32 s29, s31
3049; MOVREL-NEXT:    s_mov_b32 s31, s33
3050; MOVREL-NEXT:    s_mov_b32 s0, s2
3051; MOVREL-NEXT:    s_mov_b32 s2, s4
3052; MOVREL-NEXT:    s_mov_b32 s4, s6
3053; MOVREL-NEXT:    s_mov_b32 s6, s8
3054; MOVREL-NEXT:    s_mov_b32 s8, s10
3055; MOVREL-NEXT:    s_mov_b32 s10, s12
3056; MOVREL-NEXT:    s_mov_b32 s12, s14
3057; MOVREL-NEXT:    s_mov_b32 s14, s16
3058; MOVREL-NEXT:    s_mov_b32 s16, s18
3059; MOVREL-NEXT:    s_mov_b32 s18, s20
3060; MOVREL-NEXT:    s_mov_b32 s20, s22
3061; MOVREL-NEXT:    s_mov_b32 s22, s24
3062; MOVREL-NEXT:    s_mov_b32 s24, s26
3063; MOVREL-NEXT:    s_mov_b32 s26, s28
3064; MOVREL-NEXT:    s_mov_b32 s28, s30
3065; MOVREL-NEXT:    s_mov_b32 s30, s32
3066; MOVREL-NEXT:    v_mov_b32_e32 v33, s31
3067; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3068; MOVREL-NEXT:    s_lshl_b32 m0, s34, 1
3069; MOVREL-NEXT:    v_mov_b32_e32 v32, s30
3070; MOVREL-NEXT:    v_mov_b32_e32 v31, s29
3071; MOVREL-NEXT:    v_mov_b32_e32 v30, s28
3072; MOVREL-NEXT:    v_mov_b32_e32 v29, s27
3073; MOVREL-NEXT:    v_mov_b32_e32 v28, s26
3074; MOVREL-NEXT:    v_mov_b32_e32 v27, s25
3075; MOVREL-NEXT:    v_mov_b32_e32 v26, s24
3076; MOVREL-NEXT:    v_mov_b32_e32 v25, s23
3077; MOVREL-NEXT:    v_mov_b32_e32 v24, s22
3078; MOVREL-NEXT:    v_mov_b32_e32 v23, s21
3079; MOVREL-NEXT:    v_mov_b32_e32 v22, s20
3080; MOVREL-NEXT:    v_mov_b32_e32 v21, s19
3081; MOVREL-NEXT:    v_mov_b32_e32 v20, s18
3082; MOVREL-NEXT:    v_mov_b32_e32 v19, s17
3083; MOVREL-NEXT:    v_mov_b32_e32 v18, s16
3084; MOVREL-NEXT:    v_mov_b32_e32 v17, s15
3085; MOVREL-NEXT:    v_mov_b32_e32 v16, s14
3086; MOVREL-NEXT:    v_mov_b32_e32 v15, s13
3087; MOVREL-NEXT:    v_mov_b32_e32 v14, s12
3088; MOVREL-NEXT:    v_mov_b32_e32 v13, s11
3089; MOVREL-NEXT:    v_mov_b32_e32 v12, s10
3090; MOVREL-NEXT:    v_mov_b32_e32 v11, s9
3091; MOVREL-NEXT:    v_mov_b32_e32 v10, s8
3092; MOVREL-NEXT:    v_mov_b32_e32 v9, s7
3093; MOVREL-NEXT:    v_mov_b32_e32 v8, s6
3094; MOVREL-NEXT:    v_mov_b32_e32 v7, s5
3095; MOVREL-NEXT:    v_mov_b32_e32 v6, s4
3096; MOVREL-NEXT:    v_mov_b32_e32 v5, s3
3097; MOVREL-NEXT:    v_mov_b32_e32 v4, s2
3098; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3099; MOVREL-NEXT:    v_movreld_b32_e32 v2, v0
3100; MOVREL-NEXT:    v_movreld_b32_e32 v3, v1
3101; MOVREL-NEXT:    v_readfirstlane_b32 s0, v2
3102; MOVREL-NEXT:    v_readfirstlane_b32 s1, v3
3103; MOVREL-NEXT:    v_readfirstlane_b32 s2, v4
3104; MOVREL-NEXT:    v_readfirstlane_b32 s3, v5
3105; MOVREL-NEXT:    v_readfirstlane_b32 s4, v6
3106; MOVREL-NEXT:    v_readfirstlane_b32 s5, v7
3107; MOVREL-NEXT:    v_readfirstlane_b32 s6, v8
3108; MOVREL-NEXT:    v_readfirstlane_b32 s7, v9
3109; MOVREL-NEXT:    v_readfirstlane_b32 s8, v10
3110; MOVREL-NEXT:    v_readfirstlane_b32 s9, v11
3111; MOVREL-NEXT:    v_readfirstlane_b32 s10, v12
3112; MOVREL-NEXT:    v_readfirstlane_b32 s11, v13
3113; MOVREL-NEXT:    v_readfirstlane_b32 s12, v14
3114; MOVREL-NEXT:    v_readfirstlane_b32 s13, v15
3115; MOVREL-NEXT:    v_readfirstlane_b32 s14, v16
3116; MOVREL-NEXT:    v_readfirstlane_b32 s15, v17
3117; MOVREL-NEXT:    v_readfirstlane_b32 s16, v18
3118; MOVREL-NEXT:    v_readfirstlane_b32 s17, v19
3119; MOVREL-NEXT:    v_readfirstlane_b32 s18, v20
3120; MOVREL-NEXT:    v_readfirstlane_b32 s19, v21
3121; MOVREL-NEXT:    v_readfirstlane_b32 s20, v22
3122; MOVREL-NEXT:    v_readfirstlane_b32 s21, v23
3123; MOVREL-NEXT:    v_readfirstlane_b32 s22, v24
3124; MOVREL-NEXT:    v_readfirstlane_b32 s23, v25
3125; MOVREL-NEXT:    v_readfirstlane_b32 s24, v26
3126; MOVREL-NEXT:    v_readfirstlane_b32 s25, v27
3127; MOVREL-NEXT:    v_readfirstlane_b32 s26, v28
3128; MOVREL-NEXT:    v_readfirstlane_b32 s27, v29
3129; MOVREL-NEXT:    v_readfirstlane_b32 s28, v30
3130; MOVREL-NEXT:    v_readfirstlane_b32 s29, v31
3131; MOVREL-NEXT:    v_readfirstlane_b32 s30, v32
3132; MOVREL-NEXT:    v_readfirstlane_b32 s31, v33
3133; MOVREL-NEXT:    ; return to shader part epilog
3134entry:
3135  %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx
3136  ret <16 x i64> %insert
3137}
3138
3139define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) {
3140; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s:
3141; GPRIDX:       ; %bb.0: ; %entry
3142; GPRIDX-NEXT:    s_mov_b32 s1, s3
3143; GPRIDX-NEXT:    s_mov_b32 s3, s5
3144; GPRIDX-NEXT:    s_mov_b32 s5, s7
3145; GPRIDX-NEXT:    s_mov_b32 s7, s9
3146; GPRIDX-NEXT:    s_mov_b32 s9, s11
3147; GPRIDX-NEXT:    s_mov_b32 s11, s13
3148; GPRIDX-NEXT:    s_mov_b32 s13, s15
3149; GPRIDX-NEXT:    s_mov_b32 s15, s17
3150; GPRIDX-NEXT:    s_mov_b32 s17, s19
3151; GPRIDX-NEXT:    s_mov_b32 s19, s21
3152; GPRIDX-NEXT:    s_mov_b32 s21, s23
3153; GPRIDX-NEXT:    s_mov_b32 s23, s25
3154; GPRIDX-NEXT:    s_mov_b32 s25, s27
3155; GPRIDX-NEXT:    s_mov_b32 s27, s29
3156; GPRIDX-NEXT:    s_mov_b32 s29, s31
3157; GPRIDX-NEXT:    s_mov_b32 s31, s33
3158; GPRIDX-NEXT:    s_mov_b32 s0, s2
3159; GPRIDX-NEXT:    s_mov_b32 s2, s4
3160; GPRIDX-NEXT:    s_mov_b32 s4, s6
3161; GPRIDX-NEXT:    s_mov_b32 s6, s8
3162; GPRIDX-NEXT:    s_mov_b32 s8, s10
3163; GPRIDX-NEXT:    s_mov_b32 s10, s12
3164; GPRIDX-NEXT:    s_mov_b32 s12, s14
3165; GPRIDX-NEXT:    s_mov_b32 s14, s16
3166; GPRIDX-NEXT:    s_mov_b32 s16, s18
3167; GPRIDX-NEXT:    s_mov_b32 s18, s20
3168; GPRIDX-NEXT:    s_mov_b32 s20, s22
3169; GPRIDX-NEXT:    s_mov_b32 s22, s24
3170; GPRIDX-NEXT:    s_mov_b32 s24, s26
3171; GPRIDX-NEXT:    s_mov_b32 s26, s28
3172; GPRIDX-NEXT:    s_mov_b32 s28, s30
3173; GPRIDX-NEXT:    s_mov_b32 s30, s32
3174; GPRIDX-NEXT:    v_mov_b32_e32 v33, s31
3175; GPRIDX-NEXT:    s_lshl_b32 s33, s34, 1
3176; GPRIDX-NEXT:    v_mov_b32_e32 v32, s30
3177; GPRIDX-NEXT:    v_mov_b32_e32 v31, s29
3178; GPRIDX-NEXT:    v_mov_b32_e32 v30, s28
3179; GPRIDX-NEXT:    v_mov_b32_e32 v29, s27
3180; GPRIDX-NEXT:    v_mov_b32_e32 v28, s26
3181; GPRIDX-NEXT:    v_mov_b32_e32 v27, s25
3182; GPRIDX-NEXT:    v_mov_b32_e32 v26, s24
3183; GPRIDX-NEXT:    v_mov_b32_e32 v25, s23
3184; GPRIDX-NEXT:    v_mov_b32_e32 v24, s22
3185; GPRIDX-NEXT:    v_mov_b32_e32 v23, s21
3186; GPRIDX-NEXT:    v_mov_b32_e32 v22, s20
3187; GPRIDX-NEXT:    v_mov_b32_e32 v21, s19
3188; GPRIDX-NEXT:    v_mov_b32_e32 v20, s18
3189; GPRIDX-NEXT:    v_mov_b32_e32 v19, s17
3190; GPRIDX-NEXT:    v_mov_b32_e32 v18, s16
3191; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
3192; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
3193; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
3194; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
3195; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
3196; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
3197; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
3198; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
3199; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
3200; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
3201; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
3202; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
3203; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
3204; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
3205; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
3206; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
3207; GPRIDX-NEXT:    s_set_gpr_idx_on s33, gpr_idx(DST)
3208; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
3209; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
3210; GPRIDX-NEXT:    s_set_gpr_idx_off
3211; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
3212; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
3213; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
3214; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
3215; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
3216; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
3217; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
3218; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
3219; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
3220; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
3221; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
3222; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
3223; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
3224; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
3225; GPRIDX-NEXT:    v_readfirstlane_b32 s14, v16
3226; GPRIDX-NEXT:    v_readfirstlane_b32 s15, v17
3227; GPRIDX-NEXT:    v_readfirstlane_b32 s16, v18
3228; GPRIDX-NEXT:    v_readfirstlane_b32 s17, v19
3229; GPRIDX-NEXT:    v_readfirstlane_b32 s18, v20
3230; GPRIDX-NEXT:    v_readfirstlane_b32 s19, v21
3231; GPRIDX-NEXT:    v_readfirstlane_b32 s20, v22
3232; GPRIDX-NEXT:    v_readfirstlane_b32 s21, v23
3233; GPRIDX-NEXT:    v_readfirstlane_b32 s22, v24
3234; GPRIDX-NEXT:    v_readfirstlane_b32 s23, v25
3235; GPRIDX-NEXT:    v_readfirstlane_b32 s24, v26
3236; GPRIDX-NEXT:    v_readfirstlane_b32 s25, v27
3237; GPRIDX-NEXT:    v_readfirstlane_b32 s26, v28
3238; GPRIDX-NEXT:    v_readfirstlane_b32 s27, v29
3239; GPRIDX-NEXT:    v_readfirstlane_b32 s28, v30
3240; GPRIDX-NEXT:    v_readfirstlane_b32 s29, v31
3241; GPRIDX-NEXT:    v_readfirstlane_b32 s30, v32
3242; GPRIDX-NEXT:    v_readfirstlane_b32 s31, v33
3243; GPRIDX-NEXT:    ; return to shader part epilog
3244;
3245; MOVREL-LABEL: dyn_insertelement_v16f64_s_v_s:
3246; MOVREL:       ; %bb.0: ; %entry
3247; MOVREL-NEXT:    s_mov_b32 s1, s3
3248; MOVREL-NEXT:    s_mov_b32 s3, s5
3249; MOVREL-NEXT:    s_mov_b32 s5, s7
3250; MOVREL-NEXT:    s_mov_b32 s7, s9
3251; MOVREL-NEXT:    s_mov_b32 s9, s11
3252; MOVREL-NEXT:    s_mov_b32 s11, s13
3253; MOVREL-NEXT:    s_mov_b32 s13, s15
3254; MOVREL-NEXT:    s_mov_b32 s15, s17
3255; MOVREL-NEXT:    s_mov_b32 s17, s19
3256; MOVREL-NEXT:    s_mov_b32 s19, s21
3257; MOVREL-NEXT:    s_mov_b32 s21, s23
3258; MOVREL-NEXT:    s_mov_b32 s23, s25
3259; MOVREL-NEXT:    s_mov_b32 s25, s27
3260; MOVREL-NEXT:    s_mov_b32 s27, s29
3261; MOVREL-NEXT:    s_mov_b32 s29, s31
3262; MOVREL-NEXT:    s_mov_b32 s31, s33
3263; MOVREL-NEXT:    s_mov_b32 s0, s2
3264; MOVREL-NEXT:    s_mov_b32 s2, s4
3265; MOVREL-NEXT:    s_mov_b32 s4, s6
3266; MOVREL-NEXT:    s_mov_b32 s6, s8
3267; MOVREL-NEXT:    s_mov_b32 s8, s10
3268; MOVREL-NEXT:    s_mov_b32 s10, s12
3269; MOVREL-NEXT:    s_mov_b32 s12, s14
3270; MOVREL-NEXT:    s_mov_b32 s14, s16
3271; MOVREL-NEXT:    s_mov_b32 s16, s18
3272; MOVREL-NEXT:    s_mov_b32 s18, s20
3273; MOVREL-NEXT:    s_mov_b32 s20, s22
3274; MOVREL-NEXT:    s_mov_b32 s22, s24
3275; MOVREL-NEXT:    s_mov_b32 s24, s26
3276; MOVREL-NEXT:    s_mov_b32 s26, s28
3277; MOVREL-NEXT:    s_mov_b32 s28, s30
3278; MOVREL-NEXT:    s_mov_b32 s30, s32
3279; MOVREL-NEXT:    v_mov_b32_e32 v33, s31
3280; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3281; MOVREL-NEXT:    s_lshl_b32 m0, s34, 1
3282; MOVREL-NEXT:    v_mov_b32_e32 v32, s30
3283; MOVREL-NEXT:    v_mov_b32_e32 v31, s29
3284; MOVREL-NEXT:    v_mov_b32_e32 v30, s28
3285; MOVREL-NEXT:    v_mov_b32_e32 v29, s27
3286; MOVREL-NEXT:    v_mov_b32_e32 v28, s26
3287; MOVREL-NEXT:    v_mov_b32_e32 v27, s25
3288; MOVREL-NEXT:    v_mov_b32_e32 v26, s24
3289; MOVREL-NEXT:    v_mov_b32_e32 v25, s23
3290; MOVREL-NEXT:    v_mov_b32_e32 v24, s22
3291; MOVREL-NEXT:    v_mov_b32_e32 v23, s21
3292; MOVREL-NEXT:    v_mov_b32_e32 v22, s20
3293; MOVREL-NEXT:    v_mov_b32_e32 v21, s19
3294; MOVREL-NEXT:    v_mov_b32_e32 v20, s18
3295; MOVREL-NEXT:    v_mov_b32_e32 v19, s17
3296; MOVREL-NEXT:    v_mov_b32_e32 v18, s16
3297; MOVREL-NEXT:    v_mov_b32_e32 v17, s15
3298; MOVREL-NEXT:    v_mov_b32_e32 v16, s14
3299; MOVREL-NEXT:    v_mov_b32_e32 v15, s13
3300; MOVREL-NEXT:    v_mov_b32_e32 v14, s12
3301; MOVREL-NEXT:    v_mov_b32_e32 v13, s11
3302; MOVREL-NEXT:    v_mov_b32_e32 v12, s10
3303; MOVREL-NEXT:    v_mov_b32_e32 v11, s9
3304; MOVREL-NEXT:    v_mov_b32_e32 v10, s8
3305; MOVREL-NEXT:    v_mov_b32_e32 v9, s7
3306; MOVREL-NEXT:    v_mov_b32_e32 v8, s6
3307; MOVREL-NEXT:    v_mov_b32_e32 v7, s5
3308; MOVREL-NEXT:    v_mov_b32_e32 v6, s4
3309; MOVREL-NEXT:    v_mov_b32_e32 v5, s3
3310; MOVREL-NEXT:    v_mov_b32_e32 v4, s2
3311; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3312; MOVREL-NEXT:    v_movreld_b32_e32 v2, v0
3313; MOVREL-NEXT:    v_movreld_b32_e32 v3, v1
3314; MOVREL-NEXT:    v_readfirstlane_b32 s0, v2
3315; MOVREL-NEXT:    v_readfirstlane_b32 s1, v3
3316; MOVREL-NEXT:    v_readfirstlane_b32 s2, v4
3317; MOVREL-NEXT:    v_readfirstlane_b32 s3, v5
3318; MOVREL-NEXT:    v_readfirstlane_b32 s4, v6
3319; MOVREL-NEXT:    v_readfirstlane_b32 s5, v7
3320; MOVREL-NEXT:    v_readfirstlane_b32 s6, v8
3321; MOVREL-NEXT:    v_readfirstlane_b32 s7, v9
3322; MOVREL-NEXT:    v_readfirstlane_b32 s8, v10
3323; MOVREL-NEXT:    v_readfirstlane_b32 s9, v11
3324; MOVREL-NEXT:    v_readfirstlane_b32 s10, v12
3325; MOVREL-NEXT:    v_readfirstlane_b32 s11, v13
3326; MOVREL-NEXT:    v_readfirstlane_b32 s12, v14
3327; MOVREL-NEXT:    v_readfirstlane_b32 s13, v15
3328; MOVREL-NEXT:    v_readfirstlane_b32 s14, v16
3329; MOVREL-NEXT:    v_readfirstlane_b32 s15, v17
3330; MOVREL-NEXT:    v_readfirstlane_b32 s16, v18
3331; MOVREL-NEXT:    v_readfirstlane_b32 s17, v19
3332; MOVREL-NEXT:    v_readfirstlane_b32 s18, v20
3333; MOVREL-NEXT:    v_readfirstlane_b32 s19, v21
3334; MOVREL-NEXT:    v_readfirstlane_b32 s20, v22
3335; MOVREL-NEXT:    v_readfirstlane_b32 s21, v23
3336; MOVREL-NEXT:    v_readfirstlane_b32 s22, v24
3337; MOVREL-NEXT:    v_readfirstlane_b32 s23, v25
3338; MOVREL-NEXT:    v_readfirstlane_b32 s24, v26
3339; MOVREL-NEXT:    v_readfirstlane_b32 s25, v27
3340; MOVREL-NEXT:    v_readfirstlane_b32 s26, v28
3341; MOVREL-NEXT:    v_readfirstlane_b32 s27, v29
3342; MOVREL-NEXT:    v_readfirstlane_b32 s28, v30
3343; MOVREL-NEXT:    v_readfirstlane_b32 s29, v31
3344; MOVREL-NEXT:    v_readfirstlane_b32 s30, v32
3345; MOVREL-NEXT:    v_readfirstlane_b32 s31, v33
3346; MOVREL-NEXT:    ; return to shader part epilog
3347entry:
3348  %insert = insertelement <16 x double> %vec, double %val, i32 %idx
3349  ret <16 x double> %insert
3350}
3351
3352define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) {
3353; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s:
3354; GPRIDX:       ; %bb.0: ; %entry
3355; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 0
3356; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s2
3357; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
3358; GPRIDX-NEXT:    s_cselect_b32 s1, s9, s3
3359; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
3360; GPRIDX-NEXT:    s_cselect_b32 s2, s9, s4
3361; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
3362; GPRIDX-NEXT:    s_cselect_b32 s3, s9, s5
3363; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
3364; GPRIDX-NEXT:    s_cselect_b32 s4, s9, s6
3365; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
3366; GPRIDX-NEXT:    s_cselect_b32 s5, s9, s7
3367; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
3368; GPRIDX-NEXT:    s_cselect_b32 s6, s9, s8
3369; GPRIDX-NEXT:    ; return to shader part epilog
3370;
3371; MOVREL-LABEL: dyn_insertelement_v7i32_s_s_s:
3372; MOVREL:       ; %bb.0: ; %entry
3373; MOVREL-NEXT:    s_cmp_eq_u32 s10, 0
3374; MOVREL-NEXT:    s_cselect_b32 s0, s9, s2
3375; MOVREL-NEXT:    s_cmp_eq_u32 s10, 1
3376; MOVREL-NEXT:    s_cselect_b32 s1, s9, s3
3377; MOVREL-NEXT:    s_cmp_eq_u32 s10, 2
3378; MOVREL-NEXT:    s_cselect_b32 s2, s9, s4
3379; MOVREL-NEXT:    s_cmp_eq_u32 s10, 3
3380; MOVREL-NEXT:    s_cselect_b32 s3, s9, s5
3381; MOVREL-NEXT:    s_cmp_eq_u32 s10, 4
3382; MOVREL-NEXT:    s_cselect_b32 s4, s9, s6
3383; MOVREL-NEXT:    s_cmp_eq_u32 s10, 5
3384; MOVREL-NEXT:    s_cselect_b32 s5, s9, s7
3385; MOVREL-NEXT:    s_cmp_eq_u32 s10, 6
3386; MOVREL-NEXT:    s_cselect_b32 s6, s9, s8
3387; MOVREL-NEXT:    ; return to shader part epilog
3388entry:
3389  %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx
3390  ret <7 x i32> %insert
3391}
3392
3393define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
3394; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s:
3395; GPRIDX:       ; %bb.0: ; %entry
3396; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 0
3397; GPRIDX-NEXT:    s_cselect_b32 s0, s9, s2
3398; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
3399; GPRIDX-NEXT:    s_cselect_b32 s1, s9, s3
3400; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 2
3401; GPRIDX-NEXT:    s_cselect_b32 s2, s9, s4
3402; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 3
3403; GPRIDX-NEXT:    s_cselect_b32 s3, s9, s5
3404; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 4
3405; GPRIDX-NEXT:    s_cselect_b32 s4, s9, s6
3406; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 5
3407; GPRIDX-NEXT:    s_cselect_b32 s5, s9, s7
3408; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 6
3409; GPRIDX-NEXT:    s_cselect_b32 s6, s9, s8
3410; GPRIDX-NEXT:    ; return to shader part epilog
3411;
3412; MOVREL-LABEL: dyn_insertelement_v7p3i8_s_s_s:
3413; MOVREL:       ; %bb.0: ; %entry
3414; MOVREL-NEXT:    s_cmp_eq_u32 s10, 0
3415; MOVREL-NEXT:    s_cselect_b32 s0, s9, s2
3416; MOVREL-NEXT:    s_cmp_eq_u32 s10, 1
3417; MOVREL-NEXT:    s_cselect_b32 s1, s9, s3
3418; MOVREL-NEXT:    s_cmp_eq_u32 s10, 2
3419; MOVREL-NEXT:    s_cselect_b32 s2, s9, s4
3420; MOVREL-NEXT:    s_cmp_eq_u32 s10, 3
3421; MOVREL-NEXT:    s_cselect_b32 s3, s9, s5
3422; MOVREL-NEXT:    s_cmp_eq_u32 s10, 4
3423; MOVREL-NEXT:    s_cselect_b32 s4, s9, s6
3424; MOVREL-NEXT:    s_cmp_eq_u32 s10, 5
3425; MOVREL-NEXT:    s_cselect_b32 s5, s9, s7
3426; MOVREL-NEXT:    s_cmp_eq_u32 s10, 6
3427; MOVREL-NEXT:    s_cselect_b32 s6, s9, s8
3428; MOVREL-NEXT:    ; return to shader part epilog
3429entry:
3430  %insert = insertelement <7 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
3431  ret <7 x i8 addrspace(3)*> %insert
3432}
3433
3434define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) {
3435; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s:
3436; GPRIDX:       ; %bb.0: ; %entry
3437; GPRIDX-NEXT:    s_mov_b32 s0, s2
3438; GPRIDX-NEXT:    s_mov_b32 s1, s3
3439; GPRIDX-NEXT:    s_mov_b32 s2, s4
3440; GPRIDX-NEXT:    s_mov_b32 s3, s5
3441; GPRIDX-NEXT:    s_mov_b32 s4, s6
3442; GPRIDX-NEXT:    s_mov_b32 s5, s7
3443; GPRIDX-NEXT:    s_mov_b32 s6, s8
3444; GPRIDX-NEXT:    v_mov_b32_e32 v14, s7
3445; GPRIDX-NEXT:    v_mov_b32_e32 v7, s0
3446; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 0
3447; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc
3448; GPRIDX-NEXT:    v_mov_b32_e32 v8, s1
3449; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 1
3450; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v8, v0, vcc
3451; GPRIDX-NEXT:    v_mov_b32_e32 v9, s2
3452; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 2
3453; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v9, v0, vcc
3454; GPRIDX-NEXT:    v_mov_b32_e32 v10, s3
3455; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 3
3456; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v10, v0, vcc
3457; GPRIDX-NEXT:    v_mov_b32_e32 v11, s4
3458; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 4
3459; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v11, v0, vcc
3460; GPRIDX-NEXT:    v_mov_b32_e32 v12, s5
3461; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 5
3462; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v12, v0, vcc
3463; GPRIDX-NEXT:    v_mov_b32_e32 v13, s6
3464; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 6
3465; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v13, v0, vcc
3466; GPRIDX-NEXT:    v_mov_b32_e32 v0, v7
3467; GPRIDX-NEXT:    ; return to shader part epilog
3468;
3469; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_s:
3470; MOVREL:       ; %bb.0: ; %entry
3471; MOVREL-NEXT:    s_mov_b32 s0, s2
3472; MOVREL-NEXT:    s_mov_b32 s1, s3
3473; MOVREL-NEXT:    s_mov_b32 s2, s4
3474; MOVREL-NEXT:    s_mov_b32 s3, s5
3475; MOVREL-NEXT:    s_mov_b32 s4, s6
3476; MOVREL-NEXT:    s_mov_b32 s5, s7
3477; MOVREL-NEXT:    s_mov_b32 s6, s8
3478; MOVREL-NEXT:    v_mov_b32_e32 v16, s7
3479; MOVREL-NEXT:    v_mov_b32_e32 v9, s0
3480; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 0
3481; MOVREL-NEXT:    v_mov_b32_e32 v10, s1
3482; MOVREL-NEXT:    v_mov_b32_e32 v11, s2
3483; MOVREL-NEXT:    v_mov_b32_e32 v12, s3
3484; MOVREL-NEXT:    v_mov_b32_e32 v13, s4
3485; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc_lo
3486; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 1
3487; MOVREL-NEXT:    v_mov_b32_e32 v14, s5
3488; MOVREL-NEXT:    v_mov_b32_e32 v15, s6
3489; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v10, v0, vcc_lo
3490; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 2
3491; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v11, v0, vcc_lo
3492; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 3
3493; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v12, v0, vcc_lo
3494; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 4
3495; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v13, v0, vcc_lo
3496; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 5
3497; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v14, v0, vcc_lo
3498; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s9, 6
3499; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v15, v0, vcc_lo
3500; MOVREL-NEXT:    v_mov_b32_e32 v0, v7
3501; MOVREL-NEXT:    ; return to shader part epilog
3502entry:
3503  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
3504  ret <7 x float> %insert
3505}
3506
3507define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) {
3508; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v:
3509; GPRIDX:       ; %bb.0: ; %entry
3510; GPRIDX-NEXT:    s_mov_b32 s0, s2
3511; GPRIDX-NEXT:    s_mov_b32 s1, s3
3512; GPRIDX-NEXT:    s_mov_b32 s2, s4
3513; GPRIDX-NEXT:    s_mov_b32 s3, s5
3514; GPRIDX-NEXT:    s_mov_b32 s4, s6
3515; GPRIDX-NEXT:    s_mov_b32 s5, s7
3516; GPRIDX-NEXT:    s_mov_b32 s6, s8
3517; GPRIDX-NEXT:    v_mov_b32_e32 v15, s7
3518; GPRIDX-NEXT:    v_mov_b32_e32 v8, s0
3519; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
3520; GPRIDX-NEXT:    v_cndmask_b32_e32 v8, v8, v0, vcc
3521; GPRIDX-NEXT:    v_mov_b32_e32 v9, s1
3522; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3523; GPRIDX-NEXT:    v_cndmask_b32_e32 v7, v9, v0, vcc
3524; GPRIDX-NEXT:    v_mov_b32_e32 v10, s2
3525; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v1
3526; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v10, v0, vcc
3527; GPRIDX-NEXT:    v_mov_b32_e32 v11, s3
3528; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v1
3529; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v11, v0, vcc
3530; GPRIDX-NEXT:    v_mov_b32_e32 v12, s4
3531; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v1
3532; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v12, v0, vcc
3533; GPRIDX-NEXT:    v_mov_b32_e32 v13, s5
3534; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v1
3535; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v13, v0, vcc
3536; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v1
3537; GPRIDX-NEXT:    v_mov_b32_e32 v14, s6
3538; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v14, v0, vcc
3539; GPRIDX-NEXT:    v_mov_b32_e32 v0, v8
3540; GPRIDX-NEXT:    v_mov_b32_e32 v1, v7
3541; GPRIDX-NEXT:    ; return to shader part epilog
3542;
3543; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_v:
3544; MOVREL:       ; %bb.0: ; %entry
3545; MOVREL-NEXT:    s_mov_b32 s0, s2
3546; MOVREL-NEXT:    s_mov_b32 s1, s3
3547; MOVREL-NEXT:    s_mov_b32 s2, s4
3548; MOVREL-NEXT:    s_mov_b32 s3, s5
3549; MOVREL-NEXT:    s_mov_b32 s4, s6
3550; MOVREL-NEXT:    s_mov_b32 s5, s7
3551; MOVREL-NEXT:    s_mov_b32 s6, s8
3552; MOVREL-NEXT:    v_mov_b32_e32 v16, s7
3553; MOVREL-NEXT:    v_mov_b32_e32 v9, s0
3554; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
3555; MOVREL-NEXT:    v_mov_b32_e32 v10, s1
3556; MOVREL-NEXT:    v_mov_b32_e32 v11, s2
3557; MOVREL-NEXT:    v_mov_b32_e32 v12, s3
3558; MOVREL-NEXT:    v_mov_b32_e32 v13, s4
3559; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v9, v0, vcc_lo
3560; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
3561; MOVREL-NEXT:    v_mov_b32_e32 v14, s5
3562; MOVREL-NEXT:    v_mov_b32_e32 v15, s6
3563; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v10, v0, vcc_lo
3564; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v1
3565; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v11, v0, vcc_lo
3566; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v1
3567; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v12, v0, vcc_lo
3568; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v1
3569; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v13, v0, vcc_lo
3570; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v1
3571; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v14, v0, vcc_lo
3572; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v1
3573; MOVREL-NEXT:    v_mov_b32_e32 v1, v7
3574; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v15, v0, vcc_lo
3575; MOVREL-NEXT:    v_mov_b32_e32 v0, v8
3576; MOVREL-NEXT:    ; return to shader part epilog
3577entry:
3578  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
3579  ret <7 x float> %insert
3580}
3581
3582define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) {
3583; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s:
3584; GPRIDX:       ; %bb.0: ; %entry
3585; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
3586; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3587; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 1
3588; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3589; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 2
3590; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
3591; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 3
3592; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
3593; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 4
3594; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
3595; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 5
3596; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
3597; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 6
3598; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
3599; GPRIDX-NEXT:    ; return to shader part epilog
3600;
3601; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_s:
3602; MOVREL:       ; %bb.0: ; %entry
3603; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
3604; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3605; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
3606; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
3607; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
3608; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc_lo
3609; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
3610; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
3611; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
3612; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
3613; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 5
3614; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
3615; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 6
3616; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
3617; MOVREL-NEXT:    ; return to shader part epilog
3618entry:
3619  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
3620  ret <7 x float> %insert
3621}
3622
3623define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) {
3624; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v:
3625; GPRIDX:       ; %bb.0: ; %entry
3626; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
3627; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc
3628; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
3629; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
3630; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v8
3631; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc
3632; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 3, v8
3633; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
3634; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 4, v8
3635; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
3636; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 5, v8
3637; GPRIDX-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
3638; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 6, v8
3639; GPRIDX-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
3640; GPRIDX-NEXT:    ; return to shader part epilog
3641;
3642; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_v:
3643; MOVREL:       ; %bb.0: ; %entry
3644; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
3645; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
3646; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
3647; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc_lo
3648; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v8
3649; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v2, v7, vcc_lo
3650; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v8
3651; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
3652; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v8
3653; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc_lo
3654; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 5, v8
3655; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
3656; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 6, v8
3657; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc_lo
3658; MOVREL-NEXT:    ; return to shader part epilog
3659entry:
3660  %insert = insertelement <7 x float> %vec, float %val, i32 %idx
3661  ret <7 x float> %insert
3662}
3663
3664define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
3665; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s:
3666; GPRIDX:       ; %bb.0: ; %entry
3667; GPRIDX-NEXT:    s_mov_b32 s0, s2
3668; GPRIDX-NEXT:    s_mov_b32 s1, s3
3669; GPRIDX-NEXT:    s_mov_b32 s2, s4
3670; GPRIDX-NEXT:    s_mov_b32 s3, s5
3671; GPRIDX-NEXT:    s_mov_b32 s4, s6
3672; GPRIDX-NEXT:    s_mov_b32 s5, s7
3673; GPRIDX-NEXT:    s_mov_b32 s6, s8
3674; GPRIDX-NEXT:    s_mov_b32 s7, s9
3675; GPRIDX-NEXT:    s_mov_b32 s8, s10
3676; GPRIDX-NEXT:    s_mov_b32 s9, s11
3677; GPRIDX-NEXT:    s_mov_b32 s10, s12
3678; GPRIDX-NEXT:    s_mov_b32 s11, s13
3679; GPRIDX-NEXT:    s_mov_b32 s12, s14
3680; GPRIDX-NEXT:    s_mov_b32 s13, s15
3681; GPRIDX-NEXT:    s_mov_b32 m0, s18
3682; GPRIDX-NEXT:    s_nop 0
3683; GPRIDX-NEXT:    s_movreld_b64 s[0:1], s[16:17]
3684; GPRIDX-NEXT:    ; return to shader part epilog
3685;
3686; MOVREL-LABEL: dyn_insertelement_v7f64_s_s_s:
3687; MOVREL:       ; %bb.0: ; %entry
3688; MOVREL-NEXT:    s_mov_b32 s0, s2
3689; MOVREL-NEXT:    s_mov_b32 s1, s3
3690; MOVREL-NEXT:    s_mov_b32 m0, s18
3691; MOVREL-NEXT:    s_mov_b32 s2, s4
3692; MOVREL-NEXT:    s_mov_b32 s3, s5
3693; MOVREL-NEXT:    s_mov_b32 s4, s6
3694; MOVREL-NEXT:    s_mov_b32 s5, s7
3695; MOVREL-NEXT:    s_mov_b32 s6, s8
3696; MOVREL-NEXT:    s_mov_b32 s7, s9
3697; MOVREL-NEXT:    s_mov_b32 s8, s10
3698; MOVREL-NEXT:    s_mov_b32 s9, s11
3699; MOVREL-NEXT:    s_mov_b32 s10, s12
3700; MOVREL-NEXT:    s_mov_b32 s11, s13
3701; MOVREL-NEXT:    s_mov_b32 s12, s14
3702; MOVREL-NEXT:    s_mov_b32 s13, s15
3703; MOVREL-NEXT:    s_movreld_b64 s[0:1], s[16:17]
3704; MOVREL-NEXT:    ; return to shader part epilog
3705entry:
3706  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
3707  ret <7 x double> %insert
3708}
3709
3710define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) {
3711; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s:
3712; GPRIDX:       ; %bb.0: ; %entry
3713; GPRIDX-NEXT:    s_mov_b32 s0, s2
3714; GPRIDX-NEXT:    s_mov_b32 s1, s3
3715; GPRIDX-NEXT:    s_mov_b32 s2, s4
3716; GPRIDX-NEXT:    s_mov_b32 s3, s5
3717; GPRIDX-NEXT:    s_mov_b32 s4, s6
3718; GPRIDX-NEXT:    s_mov_b32 s5, s7
3719; GPRIDX-NEXT:    s_mov_b32 s6, s8
3720; GPRIDX-NEXT:    s_mov_b32 s7, s9
3721; GPRIDX-NEXT:    s_mov_b32 s8, s10
3722; GPRIDX-NEXT:    s_mov_b32 s9, s11
3723; GPRIDX-NEXT:    s_mov_b32 s10, s12
3724; GPRIDX-NEXT:    s_mov_b32 s11, s13
3725; GPRIDX-NEXT:    s_mov_b32 s12, s14
3726; GPRIDX-NEXT:    s_mov_b32 s13, s15
3727; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
3728; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
3729; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
3730; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
3731; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
3732; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
3733; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
3734; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
3735; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
3736; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
3737; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
3738; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
3739; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
3740; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
3741; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
3742; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
3743; GPRIDX-NEXT:    s_lshl_b32 s0, s16, 1
3744; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
3745; GPRIDX-NEXT:    v_mov_b32_e32 v2, v0
3746; GPRIDX-NEXT:    v_mov_b32_e32 v3, v1
3747; GPRIDX-NEXT:    s_set_gpr_idx_off
3748; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
3749; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
3750; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
3751; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
3752; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
3753; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
3754; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
3755; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
3756; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v10
3757; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v11
3758; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v12
3759; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
3760; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v14
3761; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v15
3762; GPRIDX-NEXT:    ; return to shader part epilog
3763;
3764; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_s:
3765; MOVREL:       ; %bb.0: ; %entry
3766; MOVREL-NEXT:    s_mov_b32 s0, s2
3767; MOVREL-NEXT:    s_mov_b32 s1, s3
3768; MOVREL-NEXT:    s_mov_b32 s2, s4
3769; MOVREL-NEXT:    s_mov_b32 s3, s5
3770; MOVREL-NEXT:    s_mov_b32 s4, s6
3771; MOVREL-NEXT:    s_mov_b32 s5, s7
3772; MOVREL-NEXT:    s_mov_b32 s6, s8
3773; MOVREL-NEXT:    s_mov_b32 s7, s9
3774; MOVREL-NEXT:    s_mov_b32 s8, s10
3775; MOVREL-NEXT:    s_mov_b32 s9, s11
3776; MOVREL-NEXT:    s_mov_b32 s10, s12
3777; MOVREL-NEXT:    s_mov_b32 s11, s13
3778; MOVREL-NEXT:    s_mov_b32 s12, s14
3779; MOVREL-NEXT:    s_mov_b32 s13, s15
3780; MOVREL-NEXT:    v_mov_b32_e32 v17, s15
3781; MOVREL-NEXT:    v_mov_b32_e32 v2, s0
3782; MOVREL-NEXT:    s_lshl_b32 m0, s16, 1
3783; MOVREL-NEXT:    v_mov_b32_e32 v16, s14
3784; MOVREL-NEXT:    v_mov_b32_e32 v15, s13
3785; MOVREL-NEXT:    v_mov_b32_e32 v14, s12
3786; MOVREL-NEXT:    v_mov_b32_e32 v13, s11
3787; MOVREL-NEXT:    v_mov_b32_e32 v12, s10
3788; MOVREL-NEXT:    v_mov_b32_e32 v11, s9
3789; MOVREL-NEXT:    v_mov_b32_e32 v10, s8
3790; MOVREL-NEXT:    v_mov_b32_e32 v9, s7
3791; MOVREL-NEXT:    v_mov_b32_e32 v8, s6
3792; MOVREL-NEXT:    v_mov_b32_e32 v7, s5
3793; MOVREL-NEXT:    v_mov_b32_e32 v6, s4
3794; MOVREL-NEXT:    v_mov_b32_e32 v5, s3
3795; MOVREL-NEXT:    v_mov_b32_e32 v4, s2
3796; MOVREL-NEXT:    v_mov_b32_e32 v3, s1
3797; MOVREL-NEXT:    v_movreld_b32_e32 v2, v0
3798; MOVREL-NEXT:    v_movreld_b32_e32 v3, v1
3799; MOVREL-NEXT:    v_readfirstlane_b32 s0, v2
3800; MOVREL-NEXT:    v_readfirstlane_b32 s1, v3
3801; MOVREL-NEXT:    v_readfirstlane_b32 s2, v4
3802; MOVREL-NEXT:    v_readfirstlane_b32 s3, v5
3803; MOVREL-NEXT:    v_readfirstlane_b32 s4, v6
3804; MOVREL-NEXT:    v_readfirstlane_b32 s5, v7
3805; MOVREL-NEXT:    v_readfirstlane_b32 s6, v8
3806; MOVREL-NEXT:    v_readfirstlane_b32 s7, v9
3807; MOVREL-NEXT:    v_readfirstlane_b32 s8, v10
3808; MOVREL-NEXT:    v_readfirstlane_b32 s9, v11
3809; MOVREL-NEXT:    v_readfirstlane_b32 s10, v12
3810; MOVREL-NEXT:    v_readfirstlane_b32 s11, v13
3811; MOVREL-NEXT:    v_readfirstlane_b32 s12, v14
3812; MOVREL-NEXT:    v_readfirstlane_b32 s13, v15
3813; MOVREL-NEXT:    ; return to shader part epilog
3814entry:
3815  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
3816  ret <7 x double> %insert
3817}
3818
3819define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) {
3820; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v:
3821; GPRIDX:       ; %bb.0: ; %entry
3822; GPRIDX-NEXT:    s_mov_b32 s0, s2
3823; GPRIDX-NEXT:    s_mov_b32 s1, s3
3824; GPRIDX-NEXT:    s_mov_b32 s2, s4
3825; GPRIDX-NEXT:    s_mov_b32 s3, s5
3826; GPRIDX-NEXT:    s_mov_b32 s4, s6
3827; GPRIDX-NEXT:    s_mov_b32 s5, s7
3828; GPRIDX-NEXT:    s_mov_b32 s6, s8
3829; GPRIDX-NEXT:    s_mov_b32 s7, s9
3830; GPRIDX-NEXT:    s_mov_b32 s8, s10
3831; GPRIDX-NEXT:    s_mov_b32 s9, s11
3832; GPRIDX-NEXT:    s_mov_b32 s10, s12
3833; GPRIDX-NEXT:    s_mov_b32 s11, s13
3834; GPRIDX-NEXT:    s_mov_b32 s12, s14
3835; GPRIDX-NEXT:    s_mov_b32 s13, s15
3836; GPRIDX-NEXT:    v_mov_b32_e32 v18, s15
3837; GPRIDX-NEXT:    v_mov_b32_e32 v17, s14
3838; GPRIDX-NEXT:    v_mov_b32_e32 v16, s13
3839; GPRIDX-NEXT:    v_mov_b32_e32 v15, s12
3840; GPRIDX-NEXT:    v_mov_b32_e32 v14, s11
3841; GPRIDX-NEXT:    v_mov_b32_e32 v13, s10
3842; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
3843; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
3844; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
3845; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
3846; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
3847; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
3848; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
3849; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
3850; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
3851; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
3852; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
3853; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v2
3854; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v2
3855; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v2
3856; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 1, v2
3857; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 5, v2
3858; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 6, v2
3859; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v5, v0, s[10:11]
3860; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v7, v0, s[0:1]
3861; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v9, v0, s[2:3]
3862; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v11, v0, s[4:5]
3863; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v13, v0, s[6:7]
3864; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
3865; GPRIDX-NEXT:    v_cndmask_b32_e64 v0, v15, v0, s[8:9]
3866; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s[10:11]
3867; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s[2:3]
3868; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v14, v1, s[6:7]
3869; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[0:1]
3870; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v1, s[4:5]
3871; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
3872; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s[8:9]
3873; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v3
3874; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v4
3875; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
3876; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v6
3877; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v5
3878; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v8
3879; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v7
3880; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v10
3881; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v9
3882; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v12
3883; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v11
3884; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v13
3885; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v0
3886; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v1
3887; GPRIDX-NEXT:    ; return to shader part epilog
3888;
3889; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_v:
3890; MOVREL:       ; %bb.0: ; %entry
3891; MOVREL-NEXT:    s_mov_b32 s0, s2
3892; MOVREL-NEXT:    s_mov_b32 s1, s3
3893; MOVREL-NEXT:    s_mov_b32 s2, s4
3894; MOVREL-NEXT:    s_mov_b32 s3, s5
3895; MOVREL-NEXT:    s_mov_b32 s4, s6
3896; MOVREL-NEXT:    s_mov_b32 s5, s7
3897; MOVREL-NEXT:    s_mov_b32 s6, s8
3898; MOVREL-NEXT:    s_mov_b32 s7, s9
3899; MOVREL-NEXT:    s_mov_b32 s8, s10
3900; MOVREL-NEXT:    s_mov_b32 s9, s11
3901; MOVREL-NEXT:    s_mov_b32 s10, s12
3902; MOVREL-NEXT:    s_mov_b32 s11, s13
3903; MOVREL-NEXT:    s_mov_b32 s12, s14
3904; MOVREL-NEXT:    s_mov_b32 s13, s15
3905; MOVREL-NEXT:    v_mov_b32_e32 v18, s15
3906; MOVREL-NEXT:    v_mov_b32_e32 v17, s14
3907; MOVREL-NEXT:    v_mov_b32_e32 v16, s13
3908; MOVREL-NEXT:    v_mov_b32_e32 v15, s12
3909; MOVREL-NEXT:    v_mov_b32_e32 v14, s11
3910; MOVREL-NEXT:    v_mov_b32_e32 v13, s10
3911; MOVREL-NEXT:    v_mov_b32_e32 v12, s9
3912; MOVREL-NEXT:    v_mov_b32_e32 v11, s8
3913; MOVREL-NEXT:    v_mov_b32_e32 v10, s7
3914; MOVREL-NEXT:    v_mov_b32_e32 v9, s6
3915; MOVREL-NEXT:    v_mov_b32_e32 v8, s5
3916; MOVREL-NEXT:    v_mov_b32_e32 v7, s4
3917; MOVREL-NEXT:    v_mov_b32_e32 v6, s3
3918; MOVREL-NEXT:    v_mov_b32_e32 v5, s2
3919; MOVREL-NEXT:    v_mov_b32_e32 v4, s1
3920; MOVREL-NEXT:    v_mov_b32_e32 v3, s0
3921; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
3922; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
3923; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 6, v2
3924; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
3925; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
3926; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
3927; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
3928; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
3929; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
3930; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
3931; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v1, vcc_lo
3932; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v2
3933; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v0, s0
3934; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v1, s0
3935; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 5, v2
3936; MOVREL-NEXT:    v_readfirstlane_b32 s2, v5
3937; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v12, v1, vcc_lo
3938; MOVREL-NEXT:    v_cndmask_b32_e32 v11, v11, v0, vcc_lo
3939; MOVREL-NEXT:    v_readfirstlane_b32 s3, v6
3940; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v13, v0, s0
3941; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v14, v1, s0
3942; MOVREL-NEXT:    v_cndmask_b32_e64 v0, v15, v0, s1
3943; MOVREL-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s1
3944; MOVREL-NEXT:    v_readfirstlane_b32 s0, v3
3945; MOVREL-NEXT:    v_readfirstlane_b32 s1, v4
3946; MOVREL-NEXT:    v_readfirstlane_b32 s4, v7
3947; MOVREL-NEXT:    v_readfirstlane_b32 s5, v8
3948; MOVREL-NEXT:    v_readfirstlane_b32 s6, v9
3949; MOVREL-NEXT:    v_readfirstlane_b32 s7, v10
3950; MOVREL-NEXT:    v_readfirstlane_b32 s8, v11
3951; MOVREL-NEXT:    v_readfirstlane_b32 s9, v2
3952; MOVREL-NEXT:    v_readfirstlane_b32 s10, v12
3953; MOVREL-NEXT:    v_readfirstlane_b32 s11, v13
3954; MOVREL-NEXT:    v_readfirstlane_b32 s12, v0
3955; MOVREL-NEXT:    v_readfirstlane_b32 s13, v1
3956; MOVREL-NEXT:    ; return to shader part epilog
3957entry:
3958  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
3959  ret <7 x double> %insert
3960}
3961
3962define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) {
3963; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s:
3964; GPRIDX:       ; %bb.0: ; %entry
3965; GPRIDX-NEXT:    s_lshl_b32 s0, s2, 1
3966; GPRIDX-NEXT:    v_mov_b32_e32 v16, v15
3967; GPRIDX-NEXT:    s_set_gpr_idx_on s0, gpr_idx(DST)
3968; GPRIDX-NEXT:    v_mov_b32_e32 v0, v14
3969; GPRIDX-NEXT:    v_mov_b32_e32 v1, v16
3970; GPRIDX-NEXT:    s_set_gpr_idx_off
3971; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
3972; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
3973; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
3974; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
3975; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
3976; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
3977; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
3978; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
3979; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
3980; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
3981; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v10
3982; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v11
3983; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v12
3984; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v13
3985; GPRIDX-NEXT:    ; return to shader part epilog
3986;
3987; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_s:
3988; MOVREL:       ; %bb.0: ; %entry
3989; MOVREL-NEXT:    v_mov_b32_e32 v16, v15
3990; MOVREL-NEXT:    s_lshl_b32 m0, s2, 1
3991; MOVREL-NEXT:    v_movreld_b32_e32 v0, v14
3992; MOVREL-NEXT:    v_movreld_b32_e32 v1, v16
3993; MOVREL-NEXT:    v_readfirstlane_b32 s0, v0
3994; MOVREL-NEXT:    v_readfirstlane_b32 s1, v1
3995; MOVREL-NEXT:    v_readfirstlane_b32 s2, v2
3996; MOVREL-NEXT:    v_readfirstlane_b32 s3, v3
3997; MOVREL-NEXT:    v_readfirstlane_b32 s4, v4
3998; MOVREL-NEXT:    v_readfirstlane_b32 s5, v5
3999; MOVREL-NEXT:    v_readfirstlane_b32 s6, v6
4000; MOVREL-NEXT:    v_readfirstlane_b32 s7, v7
4001; MOVREL-NEXT:    v_readfirstlane_b32 s8, v8
4002; MOVREL-NEXT:    v_readfirstlane_b32 s9, v9
4003; MOVREL-NEXT:    v_readfirstlane_b32 s10, v10
4004; MOVREL-NEXT:    v_readfirstlane_b32 s11, v11
4005; MOVREL-NEXT:    v_readfirstlane_b32 s12, v12
4006; MOVREL-NEXT:    v_readfirstlane_b32 s13, v13
4007; MOVREL-NEXT:    ; return to shader part epilog
4008entry:
4009  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
4010  ret <7 x double> %insert
4011}
4012
4013define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) {
4014; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v:
4015; GPRIDX:       ; %bb.0: ; %entry
4016; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
4017; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v16
4018; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 2, v16
4019; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v16
4020; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 4, v16
4021; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], 5, v16
4022; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[10:11], 6, v16
4023; GPRIDX-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s[10:11]
4024; GPRIDX-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s[10:11]
4025; GPRIDX-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s[8:9]
4026; GPRIDX-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s[8:9]
4027; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s[6:7]
4028; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s[6:7]
4029; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s[4:5]
4030; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s[4:5]
4031; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[2:3]
4032; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v15, s[2:3]
4033; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v14, s[0:1]
4034; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s[0:1]
4035; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc
4036; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc
4037; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
4038; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
4039; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
4040; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
4041; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
4042; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
4043; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
4044; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
4045; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
4046; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
4047; GPRIDX-NEXT:    v_readfirstlane_b32 s10, v10
4048; GPRIDX-NEXT:    v_readfirstlane_b32 s11, v11
4049; GPRIDX-NEXT:    v_readfirstlane_b32 s12, v12
4050; GPRIDX-NEXT:    v_readfirstlane_b32 s13, v13
4051; GPRIDX-NEXT:    ; return to shader part epilog
4052;
4053; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_v:
4054; MOVREL:       ; %bb.0: ; %entry
4055; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
4056; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 2, v16
4057; MOVREL-NEXT:    v_cmp_eq_u32_e64 s2, 3, v16
4058; MOVREL-NEXT:    v_cmp_eq_u32_e64 s3, 4, v16
4059; MOVREL-NEXT:    v_cmp_eq_u32_e64 s4, 5, v16
4060; MOVREL-NEXT:    v_cmp_eq_u32_e64 s5, 6, v16
4061; MOVREL-NEXT:    v_mov_b32_e32 v19, v2
4062; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v16
4063; MOVREL-NEXT:    v_mov_b32_e32 v18, v3
4064; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v14, s2
4065; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s3
4066; MOVREL-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s4
4067; MOVREL-NEXT:    v_cndmask_b32_e64 v12, v12, v14, s5
4068; MOVREL-NEXT:    v_cndmask_b32_e64 v7, v7, v15, s2
4069; MOVREL-NEXT:    v_cndmask_b32_e64 v2, v19, v14, s0
4070; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s3
4071; MOVREL-NEXT:    v_cndmask_b32_e64 v3, v18, v15, s0
4072; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v14, vcc_lo
4073; MOVREL-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s4
4074; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s1
4075; MOVREL-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s5
4076; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v15, s1
4077; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v15, vcc_lo
4078; MOVREL-NEXT:    v_readfirstlane_b32 s0, v0
4079; MOVREL-NEXT:    v_readfirstlane_b32 s2, v2
4080; MOVREL-NEXT:    v_readfirstlane_b32 s3, v3
4081; MOVREL-NEXT:    v_readfirstlane_b32 s4, v4
4082; MOVREL-NEXT:    v_readfirstlane_b32 s1, v1
4083; MOVREL-NEXT:    v_readfirstlane_b32 s5, v5
4084; MOVREL-NEXT:    v_readfirstlane_b32 s6, v6
4085; MOVREL-NEXT:    v_readfirstlane_b32 s7, v7
4086; MOVREL-NEXT:    v_readfirstlane_b32 s8, v8
4087; MOVREL-NEXT:    v_readfirstlane_b32 s9, v9
4088; MOVREL-NEXT:    v_readfirstlane_b32 s10, v10
4089; MOVREL-NEXT:    v_readfirstlane_b32 s11, v11
4090; MOVREL-NEXT:    v_readfirstlane_b32 s12, v12
4091; MOVREL-NEXT:    v_readfirstlane_b32 s13, v13
4092; MOVREL-NEXT:    ; return to shader part epilog
4093entry:
4094  %insert = insertelement <7 x double> %vec, double %val, i32 %idx
4095  ret <7 x double> %insert
4096}
4097
4098define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) {
4099; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s:
4100; GPRIDX:       ; %bb.0: ; %entry
4101; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 0
4102; GPRIDX-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[2:3]
4103; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 1
4104; GPRIDX-NEXT:    s_cselect_b64 s[2:3], s[12:13], s[4:5]
4105; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 2
4106; GPRIDX-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[6:7]
4107; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 3
4108; GPRIDX-NEXT:    s_cselect_b64 s[6:7], s[12:13], s[8:9]
4109; GPRIDX-NEXT:    s_cmp_eq_u32 s14, 4
4110; GPRIDX-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[10:11]
4111; GPRIDX-NEXT:    ; return to shader part epilog
4112;
4113; MOVREL-LABEL: dyn_insertelement_v5f64_s_s_s:
4114; MOVREL:       ; %bb.0: ; %entry
4115; MOVREL-NEXT:    s_cmp_eq_u32 s14, 0
4116; MOVREL-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[2:3]
4117; MOVREL-NEXT:    s_cmp_eq_u32 s14, 1
4118; MOVREL-NEXT:    s_cselect_b64 s[2:3], s[12:13], s[4:5]
4119; MOVREL-NEXT:    s_cmp_eq_u32 s14, 2
4120; MOVREL-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[6:7]
4121; MOVREL-NEXT:    s_cmp_eq_u32 s14, 3
4122; MOVREL-NEXT:    s_cselect_b64 s[6:7], s[12:13], s[8:9]
4123; MOVREL-NEXT:    s_cmp_eq_u32 s14, 4
4124; MOVREL-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[10:11]
4125; MOVREL-NEXT:    ; return to shader part epilog
4126entry:
4127  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
4128  ret <5 x double> %insert
4129}
4130
4131define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) {
4132; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s:
4133; GPRIDX:       ; %bb.0: ; %entry
4134; GPRIDX-NEXT:    s_mov_b32 s0, s2
4135; GPRIDX-NEXT:    s_mov_b32 s1, s3
4136; GPRIDX-NEXT:    s_mov_b32 s2, s4
4137; GPRIDX-NEXT:    s_mov_b32 s3, s5
4138; GPRIDX-NEXT:    s_mov_b32 s4, s6
4139; GPRIDX-NEXT:    s_mov_b32 s5, s7
4140; GPRIDX-NEXT:    s_mov_b32 s6, s8
4141; GPRIDX-NEXT:    s_mov_b32 s7, s9
4142; GPRIDX-NEXT:    s_mov_b32 s8, s10
4143; GPRIDX-NEXT:    s_mov_b32 s9, s11
4144; GPRIDX-NEXT:    v_mov_b32_e32 v17, s15
4145; GPRIDX-NEXT:    v_mov_b32_e32 v16, s14
4146; GPRIDX-NEXT:    v_mov_b32_e32 v15, s13
4147; GPRIDX-NEXT:    v_mov_b32_e32 v14, s12
4148; GPRIDX-NEXT:    v_mov_b32_e32 v13, s11
4149; GPRIDX-NEXT:    v_mov_b32_e32 v12, s10
4150; GPRIDX-NEXT:    v_mov_b32_e32 v11, s9
4151; GPRIDX-NEXT:    v_mov_b32_e32 v10, s8
4152; GPRIDX-NEXT:    v_mov_b32_e32 v9, s7
4153; GPRIDX-NEXT:    v_mov_b32_e32 v8, s6
4154; GPRIDX-NEXT:    v_mov_b32_e32 v7, s5
4155; GPRIDX-NEXT:    v_mov_b32_e32 v6, s4
4156; GPRIDX-NEXT:    v_mov_b32_e32 v5, s3
4157; GPRIDX-NEXT:    v_mov_b32_e32 v4, s2
4158; GPRIDX-NEXT:    v_mov_b32_e32 v3, s1
4159; GPRIDX-NEXT:    v_mov_b32_e32 v2, s0
4160; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s12, 0
4161; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], s12, 1
4162; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], s12, 3
4163; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], s12, 2
4164; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s12, 4
4165; GPRIDX-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
4166; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v0, s[0:1]
4167; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v1, s[0:1]
4168; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc
4169; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v0, s[6:7]
4170; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v0, s[2:3]
4171; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v1, s[2:3]
4172; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v1, s[6:7]
4173; GPRIDX-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
4174; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
4175; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v2
4176; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v3
4177; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v4
4178; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v5
4179; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v6
4180; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v7
4181; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v8
4182; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
4183; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v0
4184; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v1
4185; GPRIDX-NEXT:    ; return to shader part epilog
4186;
4187; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_s:
4188; MOVREL:       ; %bb.0: ; %entry
4189; MOVREL-NEXT:    s_mov_b32 s0, s2
4190; MOVREL-NEXT:    s_mov_b32 s1, s3
4191; MOVREL-NEXT:    s_mov_b32 s2, s4
4192; MOVREL-NEXT:    s_mov_b32 s3, s5
4193; MOVREL-NEXT:    s_mov_b32 s4, s6
4194; MOVREL-NEXT:    s_mov_b32 s5, s7
4195; MOVREL-NEXT:    s_mov_b32 s6, s8
4196; MOVREL-NEXT:    s_mov_b32 s7, s9
4197; MOVREL-NEXT:    s_mov_b32 s8, s10
4198; MOVREL-NEXT:    s_mov_b32 s9, s11
4199; MOVREL-NEXT:    v_mov_b32_e32 v20, s15
4200; MOVREL-NEXT:    v_mov_b32_e32 v19, s14
4201; MOVREL-NEXT:    v_mov_b32_e32 v18, s13
4202; MOVREL-NEXT:    v_mov_b32_e32 v17, s12
4203; MOVREL-NEXT:    v_mov_b32_e32 v16, s11
4204; MOVREL-NEXT:    v_mov_b32_e32 v15, s10
4205; MOVREL-NEXT:    v_mov_b32_e32 v14, s9
4206; MOVREL-NEXT:    v_mov_b32_e32 v13, s8
4207; MOVREL-NEXT:    v_mov_b32_e32 v12, s7
4208; MOVREL-NEXT:    v_mov_b32_e32 v11, s6
4209; MOVREL-NEXT:    v_mov_b32_e32 v10, s5
4210; MOVREL-NEXT:    v_mov_b32_e32 v9, s4
4211; MOVREL-NEXT:    v_mov_b32_e32 v8, s3
4212; MOVREL-NEXT:    v_mov_b32_e32 v7, s2
4213; MOVREL-NEXT:    v_mov_b32_e32 v6, s1
4214; MOVREL-NEXT:    v_mov_b32_e32 v5, s0
4215; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 0
4216; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, s12, 1
4217; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, s12, 4
4218; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v5, v0, vcc_lo
4219; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v6, v1, vcc_lo
4220; MOVREL-NEXT:    v_cndmask_b32_e64 v4, v7, v0, s0
4221; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s12, 2
4222; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v8, v1, s0
4223; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, s12, 3
4224; MOVREL-NEXT:    v_readfirstlane_b32 s2, v4
4225; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v9, v0, vcc_lo
4226; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v10, v1, vcc_lo
4227; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v11, v0, s0
4228; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v12, v1, s0
4229; MOVREL-NEXT:    v_cndmask_b32_e64 v0, v13, v0, s1
4230; MOVREL-NEXT:    v_cndmask_b32_e64 v1, v14, v1, s1
4231; MOVREL-NEXT:    v_readfirstlane_b32 s0, v2
4232; MOVREL-NEXT:    v_readfirstlane_b32 s1, v3
4233; MOVREL-NEXT:    v_readfirstlane_b32 s3, v5
4234; MOVREL-NEXT:    v_readfirstlane_b32 s4, v6
4235; MOVREL-NEXT:    v_readfirstlane_b32 s5, v7
4236; MOVREL-NEXT:    v_readfirstlane_b32 s6, v8
4237; MOVREL-NEXT:    v_readfirstlane_b32 s7, v9
4238; MOVREL-NEXT:    v_readfirstlane_b32 s8, v0
4239; MOVREL-NEXT:    v_readfirstlane_b32 s9, v1
4240; MOVREL-NEXT:    ; return to shader part epilog
4241entry:
4242  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
4243  ret <5 x double> %insert
4244}
4245
4246define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) {
4247; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v:
4248; GPRIDX:       ; %bb.0: ; %entry
4249; GPRIDX-NEXT:    s_mov_b32 s0, s2
4250; GPRIDX-NEXT:    s_mov_b32 s1, s3
4251; GPRIDX-NEXT:    s_mov_b32 s2, s4
4252; GPRIDX-NEXT:    s_mov_b32 s3, s5
4253; GPRIDX-NEXT:    s_mov_b32 s4, s6
4254; GPRIDX-NEXT:    s_mov_b32 s5, s7
4255; GPRIDX-NEXT:    s_mov_b32 s6, s8
4256; GPRIDX-NEXT:    s_mov_b32 s7, s9
4257; GPRIDX-NEXT:    s_mov_b32 s8, s10
4258; GPRIDX-NEXT:    s_mov_b32 s9, s11
4259; GPRIDX-NEXT:    v_mov_b32_e32 v18, s15
4260; GPRIDX-NEXT:    v_mov_b32_e32 v17, s14
4261; GPRIDX-NEXT:    v_mov_b32_e32 v16, s13
4262; GPRIDX-NEXT:    v_mov_b32_e32 v15, s12
4263; GPRIDX-NEXT:    v_mov_b32_e32 v14, s11
4264; GPRIDX-NEXT:    v_mov_b32_e32 v13, s10
4265; GPRIDX-NEXT:    v_mov_b32_e32 v12, s9
4266; GPRIDX-NEXT:    v_mov_b32_e32 v11, s8
4267; GPRIDX-NEXT:    v_mov_b32_e32 v10, s7
4268; GPRIDX-NEXT:    v_mov_b32_e32 v9, s6
4269; GPRIDX-NEXT:    v_mov_b32_e32 v8, s5
4270; GPRIDX-NEXT:    v_mov_b32_e32 v7, s4
4271; GPRIDX-NEXT:    v_mov_b32_e32 v6, s3
4272; GPRIDX-NEXT:    v_mov_b32_e32 v5, s2
4273; GPRIDX-NEXT:    v_mov_b32_e32 v4, s1
4274; GPRIDX-NEXT:    v_mov_b32_e32 v3, s0
4275; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
4276; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 2, v2
4277; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 1, v2
4278; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 3, v2
4279; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 4, v2
4280; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v5, v0, s[6:7]
4281; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v7, v0, s[0:1]
4282; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v9, v0, s[2:3]
4283; GPRIDX-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
4284; GPRIDX-NEXT:    v_cndmask_b32_e64 v0, v11, v0, s[4:5]
4285; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s[6:7]
4286; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v10, v1, s[2:3]
4287; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v1, s[0:1]
4288; GPRIDX-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc
4289; GPRIDX-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[4:5]
4290; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v3
4291; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v4
4292; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
4293; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v6
4294; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v5
4295; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v8
4296; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v7
4297; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v9
4298; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v0
4299; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v1
4300; GPRIDX-NEXT:    ; return to shader part epilog
4301;
4302; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_v:
4303; MOVREL:       ; %bb.0: ; %entry
4304; MOVREL-NEXT:    s_mov_b32 s0, s2
4305; MOVREL-NEXT:    s_mov_b32 s1, s3
4306; MOVREL-NEXT:    s_mov_b32 s2, s4
4307; MOVREL-NEXT:    s_mov_b32 s3, s5
4308; MOVREL-NEXT:    s_mov_b32 s4, s6
4309; MOVREL-NEXT:    s_mov_b32 s5, s7
4310; MOVREL-NEXT:    s_mov_b32 s6, s8
4311; MOVREL-NEXT:    s_mov_b32 s7, s9
4312; MOVREL-NEXT:    s_mov_b32 s8, s10
4313; MOVREL-NEXT:    s_mov_b32 s9, s11
4314; MOVREL-NEXT:    v_mov_b32_e32 v18, s15
4315; MOVREL-NEXT:    v_mov_b32_e32 v17, s14
4316; MOVREL-NEXT:    v_mov_b32_e32 v16, s13
4317; MOVREL-NEXT:    v_mov_b32_e32 v15, s12
4318; MOVREL-NEXT:    v_mov_b32_e32 v14, s11
4319; MOVREL-NEXT:    v_mov_b32_e32 v13, s10
4320; MOVREL-NEXT:    v_mov_b32_e32 v12, s9
4321; MOVREL-NEXT:    v_mov_b32_e32 v11, s8
4322; MOVREL-NEXT:    v_mov_b32_e32 v10, s7
4323; MOVREL-NEXT:    v_mov_b32_e32 v9, s6
4324; MOVREL-NEXT:    v_mov_b32_e32 v8, s5
4325; MOVREL-NEXT:    v_mov_b32_e32 v7, s4
4326; MOVREL-NEXT:    v_mov_b32_e32 v6, s3
4327; MOVREL-NEXT:    v_mov_b32_e32 v5, s2
4328; MOVREL-NEXT:    v_mov_b32_e32 v4, s1
4329; MOVREL-NEXT:    v_mov_b32_e32 v3, s0
4330; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
4331; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 1, v2
4332; MOVREL-NEXT:    v_cmp_eq_u32_e64 s1, 4, v2
4333; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc_lo
4334; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v1, vcc_lo
4335; MOVREL-NEXT:    v_cndmask_b32_e64 v5, v5, v0, s0
4336; MOVREL-NEXT:    v_cndmask_b32_e64 v6, v6, v1, s0
4337; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v2
4338; MOVREL-NEXT:    v_cmp_eq_u32_e64 s0, 3, v2
4339; MOVREL-NEXT:    v_readfirstlane_b32 s2, v5
4340; MOVREL-NEXT:    v_readfirstlane_b32 s3, v6
4341; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v8, v1, vcc_lo
4342; MOVREL-NEXT:    v_cndmask_b32_e64 v8, v9, v0, s0
4343; MOVREL-NEXT:    v_cndmask_b32_e64 v9, v10, v1, s0
4344; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v0, vcc_lo
4345; MOVREL-NEXT:    v_cndmask_b32_e64 v0, v11, v0, s1
4346; MOVREL-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s1
4347; MOVREL-NEXT:    v_readfirstlane_b32 s0, v3
4348; MOVREL-NEXT:    v_readfirstlane_b32 s1, v4
4349; MOVREL-NEXT:    v_readfirstlane_b32 s4, v7
4350; MOVREL-NEXT:    v_readfirstlane_b32 s5, v2
4351; MOVREL-NEXT:    v_readfirstlane_b32 s6, v8
4352; MOVREL-NEXT:    v_readfirstlane_b32 s7, v9
4353; MOVREL-NEXT:    v_readfirstlane_b32 s8, v0
4354; MOVREL-NEXT:    v_readfirstlane_b32 s9, v1
4355; MOVREL-NEXT:    ; return to shader part epilog
4356entry:
4357  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
4358  ret <5 x double> %insert
4359}
4360
4361define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) {
4362; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s:
4363; GPRIDX:       ; %bb.0: ; %entry
4364; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
4365; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], s2, 1
4366; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[8:9], s2, 2
4367; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s2, 3
4368; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], s2, 4
4369; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[0:1]
4370; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[0:1]
4371; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4372; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4373; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[8:9]
4374; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s[4:5]
4375; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
4376; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[8:9]
4377; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[6:7]
4378; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[6:7]
4379; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
4380; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
4381; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
4382; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
4383; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
4384; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
4385; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
4386; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
4387; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
4388; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
4389; GPRIDX-NEXT:    ; return to shader part epilog
4390;
4391; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_s:
4392; MOVREL:       ; %bb.0: ; %entry
4393; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 0
4394; MOVREL-NEXT:    v_mov_b32_e32 v15, v2
4395; MOVREL-NEXT:    v_mov_b32_e32 v14, v3
4396; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
4397; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
4398; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 1
4399; MOVREL-NEXT:    v_readfirstlane_b32 s0, v0
4400; MOVREL-NEXT:    v_readfirstlane_b32 s1, v1
4401; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v14, v11, vcc_lo
4402; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v15, v10, vcc_lo
4403; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 2
4404; MOVREL-NEXT:    v_readfirstlane_b32 s3, v3
4405; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc_lo
4406; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
4407; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 3
4408; MOVREL-NEXT:    v_readfirstlane_b32 s4, v4
4409; MOVREL-NEXT:    v_readfirstlane_b32 s5, v5
4410; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
4411; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
4412; MOVREL-NEXT:    v_cmp_eq_u32_e64 vcc_lo, s2, 4
4413; MOVREL-NEXT:    v_readfirstlane_b32 s2, v2
4414; MOVREL-NEXT:    v_readfirstlane_b32 s6, v6
4415; MOVREL-NEXT:    v_readfirstlane_b32 s7, v7
4416; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc_lo
4417; MOVREL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
4418; MOVREL-NEXT:    v_readfirstlane_b32 s8, v8
4419; MOVREL-NEXT:    v_readfirstlane_b32 s9, v9
4420; MOVREL-NEXT:    ; return to shader part epilog
4421entry:
4422  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
4423  ret <5 x double> %insert
4424}
4425
4426define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) {
4427; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v:
4428; GPRIDX:       ; %bb.0: ; %entry
4429; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
4430; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v12
4431; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[2:3], 2, v12
4432; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v12
4433; GPRIDX-NEXT:    v_cmp_eq_u32_e64 s[6:7], 4, v12
4434; GPRIDX-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[6:7]
4435; GPRIDX-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[6:7]
4436; GPRIDX-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s[4:5]
4437; GPRIDX-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s[4:5]
4438; GPRIDX-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[2:3]
4439; GPRIDX-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[2:3]
4440; GPRIDX-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[0:1]
4441; GPRIDX-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[0:1]
4442; GPRIDX-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4443; GPRIDX-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4444; GPRIDX-NEXT:    v_readfirstlane_b32 s0, v0
4445; GPRIDX-NEXT:    v_readfirstlane_b32 s1, v1
4446; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v2
4447; GPRIDX-NEXT:    v_readfirstlane_b32 s3, v3
4448; GPRIDX-NEXT:    v_readfirstlane_b32 s4, v4
4449; GPRIDX-NEXT:    v_readfirstlane_b32 s5, v5
4450; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v6
4451; GPRIDX-NEXT:    v_readfirstlane_b32 s7, v7
4452; GPRIDX-NEXT:    v_readfirstlane_b32 s8, v8
4453; GPRIDX-NEXT:    v_readfirstlane_b32 s9, v9
4454; GPRIDX-NEXT:    ; return to shader part epilog
4455;
4456; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_v:
4457; MOVREL:       ; %bb.0: ; %entry
4458; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
4459; MOVREL-NEXT:    v_mov_b32_e32 v15, v2
4460; MOVREL-NEXT:    v_mov_b32_e32 v14, v3
4461; MOVREL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
4462; MOVREL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc_lo
4463; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
4464; MOVREL-NEXT:    v_readfirstlane_b32 s0, v0
4465; MOVREL-NEXT:    v_readfirstlane_b32 s1, v1
4466; MOVREL-NEXT:    v_cndmask_b32_e32 v2, v15, v10, vcc_lo
4467; MOVREL-NEXT:    v_cndmask_b32_e32 v3, v14, v11, vcc_lo
4468; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 2, v12
4469; MOVREL-NEXT:    v_readfirstlane_b32 s2, v2
4470; MOVREL-NEXT:    v_readfirstlane_b32 s3, v3
4471; MOVREL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc_lo
4472; MOVREL-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc_lo
4473; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 3, v12
4474; MOVREL-NEXT:    v_readfirstlane_b32 s4, v4
4475; MOVREL-NEXT:    v_readfirstlane_b32 s5, v5
4476; MOVREL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc_lo
4477; MOVREL-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc_lo
4478; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 4, v12
4479; MOVREL-NEXT:    v_readfirstlane_b32 s6, v6
4480; MOVREL-NEXT:    v_readfirstlane_b32 s7, v7
4481; MOVREL-NEXT:    v_cndmask_b32_e32 v8, v8, v10, vcc_lo
4482; MOVREL-NEXT:    v_cndmask_b32_e32 v9, v9, v11, vcc_lo
4483; MOVREL-NEXT:    v_readfirstlane_b32 s8, v8
4484; MOVREL-NEXT:    v_readfirstlane_b32 s9, v9
4485; MOVREL-NEXT:    ; return to shader part epilog
4486entry:
4487  %insert = insertelement <5 x double> %vec, double %val, i32 %idx
4488  ret <5 x double> %insert
4489}
4490