1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=ALL
4
5define   <16 x i32> @_inreg16xi32(i32 %a) {
6; ALL-LABEL: _inreg16xi32:
7; ALL:       # %bb.0:
8; ALL-NEXT:    vpbroadcastd %edi, %zmm0
9; ALL-NEXT:    retq
10  %b = insertelement <16 x i32> undef, i32 %a, i32 0
11  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
12  ret <16 x i32> %c
13}
14
15define   <8 x i64> @_inreg8xi64(i64 %a) {
16; ALL-LABEL: _inreg8xi64:
17; ALL:       # %bb.0:
18; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
19; ALL-NEXT:    retq
20  %b = insertelement <8 x i64> undef, i64 %a, i32 0
21  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
22  ret <8 x i64> %c
23}
24
25define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
26; ALL-LABEL: _ss16xfloat_v4:
27; ALL:       # %bb.0:
28; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
29; ALL-NEXT:    retq
30  %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
31  ret <16 x float> %b
32}
33
34define   <16 x float> @_inreg16xfloat(float %a) {
35; ALL-LABEL: _inreg16xfloat:
36; ALL:       # %bb.0:
37; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
38; ALL-NEXT:    retq
39  %b = insertelement <16 x float> undef, float %a, i32 0
40  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
41  ret <16 x float> %c
42}
43
44define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
45; ALL-LABEL: _ss16xfloat_mask:
46; ALL:       # %bb.0:
47; ALL-NEXT:    vptestmd %zmm2, %zmm2, %k1
48; ALL-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
49; ALL-NEXT:    vmovaps %zmm1, %zmm0
50; ALL-NEXT:    retq
51  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
52  %b = insertelement <16 x float> undef, float %a, i32 0
53  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
54  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
55  ret <16 x float> %r
56}
57
58define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
59; ALL-LABEL: _ss16xfloat_maskz:
60; ALL:       # %bb.0:
61; ALL-NEXT:    vptestmd %zmm1, %zmm1, %k1
62; ALL-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
63; ALL-NEXT:    retq
64  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
65  %b = insertelement <16 x float> undef, float %a, i32 0
66  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
67  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
68  ret <16 x float> %r
69}
70
71define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
72; ALL-LABEL: _ss16xfloat_load:
73; ALL:       # %bb.0:
74; ALL-NEXT:    vbroadcastss (%rdi), %zmm0
75; ALL-NEXT:    retq
76  %a = load float, float* %a.ptr
77  %b = insertelement <16 x float> undef, float %a, i32 0
78  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
79  ret <16 x float> %c
80}
81
82define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
83; ALL-LABEL: _ss16xfloat_mask_load:
84; ALL:       # %bb.0:
85; ALL-NEXT:    vptestmd %zmm1, %zmm1, %k1
86; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
87; ALL-NEXT:    retq
88  %a = load float, float* %a.ptr
89  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
90  %b = insertelement <16 x float> undef, float %a, i32 0
91  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
92  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
93  ret <16 x float> %r
94}
95
96define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
97; ALL-LABEL: _ss16xfloat_maskz_load:
98; ALL:       # %bb.0:
99; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k1
100; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
101; ALL-NEXT:    retq
102  %a = load float, float* %a.ptr
103  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
104  %b = insertelement <16 x float> undef, float %a, i32 0
105  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
106  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
107  ret <16 x float> %r
108}
109
110define   <8 x double> @_inreg8xdouble(double %a) {
111; ALL-LABEL: _inreg8xdouble:
112; ALL:       # %bb.0:
113; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
114; ALL-NEXT:    retq
115  %b = insertelement <8 x double> undef, double %a, i32 0
116  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
117  ret <8 x double> %c
118}
119
120define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
121; ALL-LABEL: _sd8xdouble_mask:
122; ALL:       # %bb.0:
123; ALL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
124; ALL-NEXT:    vptestmd %zmm2, %zmm2, %k1
125; ALL-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
126; ALL-NEXT:    vmovapd %zmm1, %zmm0
127; ALL-NEXT:    retq
128  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
129  %b = insertelement <8 x double> undef, double %a, i32 0
130  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
131  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
132  ret <8 x double> %r
133}
134
135define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
136; ALL-LABEL: _sd8xdouble_maskz:
137; ALL:       # %bb.0:
138; ALL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
139; ALL-NEXT:    vptestmd %zmm1, %zmm1, %k1
140; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
141; ALL-NEXT:    retq
142  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
143  %b = insertelement <8 x double> undef, double %a, i32 0
144  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
145  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
146  ret <8 x double> %r
147}
148
149define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
150; ALL-LABEL: _sd8xdouble_load:
151; ALL:       # %bb.0:
152; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0
153; ALL-NEXT:    retq
154  %a = load double, double* %a.ptr
155  %b = insertelement <8 x double> undef, double %a, i32 0
156  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
157  ret <8 x double> %c
158}
159
160define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
161; ALL-LABEL: _sd8xdouble_mask_load:
162; ALL:       # %bb.0:
163; ALL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
164; ALL-NEXT:    vptestmd %zmm1, %zmm1, %k1
165; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
166; ALL-NEXT:    retq
167  %a = load double, double* %a.ptr
168  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
169  %b = insertelement <8 x double> undef, double %a, i32 0
170  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
171  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
172  ret <8 x double> %r
173}
174
175define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
176; ALL-LABEL: _sd8xdouble_maskz_load:
177; ALL:       # %bb.0:
178; ALL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
179; ALL-NEXT:    vptestmd %zmm0, %zmm0, %k1
180; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
181; ALL-NEXT:    retq
182  %a = load double, double* %a.ptr
183  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
184  %b = insertelement <8 x double> undef, double %a, i32 0
185  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
186  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
187  ret <8 x double> %r
188}
189
190define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
191; ALL-LABEL: _xmm16xi32:
192; ALL:       # %bb.0:
193; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
194; ALL-NEXT:    retq
195  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
196  ret <16 x i32> %b
197}
198
199define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
200; ALL-LABEL: _xmm16xfloat:
201; ALL:       # %bb.0:
202; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
203; ALL-NEXT:    retq
204  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
205  ret <16 x float> %b
206}
207
208define <16 x i32> @test_vbroadcast(<16 x float> %a0) {
209; ALL-LABEL: test_vbroadcast:
210; ALL:       # %bb.0: # %entry
211; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
212; ALL-NEXT:    vcmpunordps %zmm1, %zmm0, %k1
213; ALL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
214; ALL-NEXT:    knotw %k1, %k1
215; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
216; ALL-NEXT:    retq
217entry:
218  %0 = sext <16 x i1> zeroinitializer to <16 x i32>
219  %1 = fcmp uno <16 x float> %a0, zeroinitializer
220  %2 = sext <16 x i1> %1 to <16 x i32>
221  %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
222  ret <16 x i32> %3
223}
224
225; We implement the set1 intrinsics with vector initializers.  Verify that the
226; IR generated will produce broadcasts at the end.
227define <8 x double> @test_set1_pd(double %d) #2 {
228; ALL-LABEL: test_set1_pd:
229; ALL:       # %bb.0: # %entry
230; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
231; ALL-NEXT:    retq
232entry:
233  %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
234  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
235  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
236  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
237  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
238  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
239  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
240  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
241  ret <8 x double> %vecinit7.i
242}
243
244define <8 x i64> @test_set1_epi64(i64 %d) #2 {
245; ALL-LABEL: test_set1_epi64:
246; ALL:       # %bb.0: # %entry
247; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
248; ALL-NEXT:    retq
249entry:
250  %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
251  %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
252  %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
253  %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
254  %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
255  %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
256  %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
257  %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
258  ret <8 x i64> %vecinit7.i
259}
260
261define <16 x float> @test_set1_ps(float %f) #2 {
262; ALL-LABEL: test_set1_ps:
263; ALL:       # %bb.0: # %entry
264; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
265; ALL-NEXT:    retq
266entry:
267  %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
268  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
269  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
270  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
271  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
272  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
273  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
274  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
275  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
276  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
277  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
278  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
279  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
280  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
281  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
282  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
283  ret <16 x float> %vecinit15.i
284}
285
286define <16 x i32> @test_set1_epi32(i32 %f) #2 {
287; ALL-LABEL: test_set1_epi32:
288; ALL:       # %bb.0: # %entry
289; ALL-NEXT:    vpbroadcastd %edi, %zmm0
290; ALL-NEXT:    retq
291entry:
292  %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
293  %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
294  %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
295  %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
296  %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
297  %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
298  %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
299  %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
300  %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
301  %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
302  %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
303  %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
304  %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
305  %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
306  %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
307  %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
308  ret <16 x i32> %vecinit15.i
309}
310
311; We implement the scalar broadcast intrinsics with vector initializers.
312; Verify that the IR generated will produce the broadcast at the end.
313define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
314; ALL-LABEL: test_mm512_broadcastsd_pd:
315; ALL:       # %bb.0: # %entry
316; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
317; ALL-NEXT:    retq
318entry:
319  %0 = extractelement <2 x double> %a, i32 0
320  %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
321  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
322  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
323  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
324  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
325  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
326  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
327  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
328  ret <8 x double> %vecinit7.i
329}
330
331define <16 x float> @test1(<8 x float>%a)  {
332; ALL-LABEL: test1:
333; ALL:       # %bb.0:
334; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
335; ALL-NEXT:    retq
336  %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
337  ret <16 x float>%res
338}
339
340define <8 x double> @test2(<4 x double>%a)  {
341; ALL-LABEL: test2:
342; ALL:       # %bb.0:
343; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
344; ALL-NEXT:    retq
345  %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
346  ret <8 x double>%res
347}
348
349define <64 x i8> @_invec32xi8(<32 x i8>%a)  {
350; AVX512F-LABEL: _invec32xi8:
351; AVX512F:       # %bb.0:
352; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
353; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
354; AVX512F-NEXT:    retq
355;
356; AVX512BW-LABEL: _invec32xi8:
357; AVX512BW:       # %bb.0:
358; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
359; AVX512BW-NEXT:    retq
360  %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
361  ret <64 x i8>%res
362}
363
364define <32 x i16> @_invec16xi16(<16 x i16>%a)  {
365; AVX512F-LABEL: _invec16xi16:
366; AVX512F:       # %bb.0:
367; AVX512F-NEXT:    vpbroadcastw %xmm0, %ymm0
368; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
369; AVX512F-NEXT:    retq
370;
371; AVX512BW-LABEL: _invec16xi16:
372; AVX512BW:       # %bb.0:
373; AVX512BW-NEXT:    vpbroadcastw %xmm0, %zmm0
374; AVX512BW-NEXT:    retq
375  %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
376  ret <32 x i16>%res
377}
378
379define <16 x i32> @_invec8xi32(<8 x i32>%a)  {
380; ALL-LABEL: _invec8xi32:
381; ALL:       # %bb.0:
382; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
383; ALL-NEXT:    retq
384  %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
385  ret <16 x i32>%res
386}
387
388define <8 x i64> @_invec4xi64(<4 x i64>%a)  {
389; ALL-LABEL: _invec4xi64:
390; ALL:       # %bb.0:
391; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
392; ALL-NEXT:    retq
393  %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
394  ret <8 x i64>%res
395}
396
397declare void @func_f32(float)
398define <16 x float> @broadcast_ss_spill(float %x) {
399; ALL-LABEL: broadcast_ss_spill:
400; ALL:       # %bb.0:
401; ALL-NEXT:    subq $24, %rsp
402; ALL-NEXT:    .cfi_def_cfa_offset 32
403; ALL-NEXT:    vaddss %xmm0, %xmm0, %xmm0
404; ALL-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
405; ALL-NEXT:    callq func_f32
406; ALL-NEXT:    vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
407; ALL-NEXT:    addq $24, %rsp
408; ALL-NEXT:    .cfi_def_cfa_offset 8
409; ALL-NEXT:    retq
410  %a  = fadd float %x, %x
411  call void @func_f32(float %a)
412  %b = insertelement <16 x float> undef, float %a, i32 0
413  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
414  ret <16 x float> %c
415}
416
417declare void @func_f64(double)
418define <8 x double> @broadcast_sd_spill(double %x) {
419; ALL-LABEL: broadcast_sd_spill:
420; ALL:       # %bb.0:
421; ALL-NEXT:    subq $24, %rsp
422; ALL-NEXT:    .cfi_def_cfa_offset 32
423; ALL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
424; ALL-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
425; ALL-NEXT:    callq func_f64
426; ALL-NEXT:    vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
427; ALL-NEXT:    addq $24, %rsp
428; ALL-NEXT:    .cfi_def_cfa_offset 8
429; ALL-NEXT:    retq
430  %a  = fadd double %x, %x
431  call void @func_f64(double %a)
432  %b = insertelement <8 x double> undef, double %a, i32 0
433  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
434  ret <8 x double> %c
435}
436