1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
6; X32-LABEL: A:
7; X32:       ## BB#0: ## %entry
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    movl (%eax), %ecx
10; X32-NEXT:    movl 4(%eax), %eax
11; X32-NEXT:    vmovd %ecx, %xmm0
12; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
13; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
14; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
15; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
16; X32-NEXT:    retl
17;
18; X64-LABEL: A:
19; X64:       ## BB#0: ## %entry
20; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
21; X64-NEXT:    retq
22entry:
23  %q = load i64, i64* %ptr, align 8
24  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
25  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
26  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
27  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
28  ret <4 x i64> %vecinit6.i
29}
30
31define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
32; X32-LABEL: B:
33; X32:       ## BB#0: ## %entry
34; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X32-NEXT:    vbroadcastss (%eax), %ymm0
36; X32-NEXT:    retl
37;
38; X64-LABEL: B:
39; X64:       ## BB#0: ## %entry
40; X64-NEXT:    vbroadcastss (%rdi), %ymm0
41; X64-NEXT:    retq
42entry:
43  %q = load i32, i32* %ptr, align 4
44  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
45  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
46  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
47  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
48  ret <8 x i32> %vecinit6.i
49}
50
51define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
52; X32-LABEL: C:
53; X32:       ## BB#0: ## %entry
54; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
55; X32-NEXT:    vbroadcastsd (%eax), %ymm0
56; X32-NEXT:    retl
57;
58; X64-LABEL: C:
59; X64:       ## BB#0: ## %entry
60; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
61; X64-NEXT:    retq
62entry:
63  %q = load double, double* %ptr, align 8
64  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
65  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
66  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
67  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
68  ret <4 x double> %vecinit6.i
69}
70
71define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
72; X32-LABEL: D:
73; X32:       ## BB#0: ## %entry
74; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
75; X32-NEXT:    vbroadcastss (%eax), %ymm0
76; X32-NEXT:    retl
77;
78; X64-LABEL: D:
79; X64:       ## BB#0: ## %entry
80; X64-NEXT:    vbroadcastss (%rdi), %ymm0
81; X64-NEXT:    retq
82entry:
83  %q = load float, float* %ptr, align 4
84  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
85  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
86  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
87  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
88  ret <8 x float> %vecinit6.i
89}
90
91;;;; 128-bit versions
92
93define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
94; X32-LABEL: e:
95; X32:       ## BB#0: ## %entry
96; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
97; X32-NEXT:    vbroadcastss (%eax), %xmm0
98; X32-NEXT:    retl
99;
100; X64-LABEL: e:
101; X64:       ## BB#0: ## %entry
102; X64-NEXT:    vbroadcastss (%rdi), %xmm0
103; X64-NEXT:    retq
104entry:
105  %q = load float, float* %ptr, align 4
106  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
107  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
108  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
109  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
110  ret <4 x float> %vecinit6.i
111}
112
113; Don't broadcast constants on pre-AVX2 hardware.
114define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
115; X32-LABEL: _e2:
116; X32:       ## BB#0: ## %entry
117; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
118; X32-NEXT:    retl
119;
120; X64-LABEL: _e2:
121; X64:       ## BB#0: ## %entry
122; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
123; X64-NEXT:    retq
124entry:
125   %vecinit.i = insertelement <4 x float> undef, float       0xbf80000000000000, i32 0
126  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
127  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
128  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
129  ret <4 x float> %vecinit6.i
130}
131
132
133define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
134; X32-LABEL: F:
135; X32:       ## BB#0: ## %entry
136; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
137; X32-NEXT:    vbroadcastss (%eax), %xmm0
138; X32-NEXT:    retl
139;
140; X64-LABEL: F:
141; X64:       ## BB#0: ## %entry
142; X64-NEXT:    vbroadcastss (%rdi), %xmm0
143; X64-NEXT:    retq
144entry:
145  %q = load i32, i32* %ptr, align 4
146  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
147  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
148  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
149  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
150  ret <4 x i32> %vecinit6.i
151}
152
153; FIXME: Pointer adjusted broadcasts
154
155define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
156; X32-LABEL: load_splat_4i32_4i32_1111:
157; X32:       ## BB#0: ## %entry
158; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
159; X32-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
160; X32-NEXT:    retl
161;
162; X64-LABEL: load_splat_4i32_4i32_1111:
163; X64:       ## BB#0: ## %entry
164; X64-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
165; X64-NEXT:    retq
166entry:
167  %ld = load <4 x i32>, <4 x i32>* %ptr
168  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
169  ret <4 x i32> %ret
170}
171
172define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
173; X32-LABEL: load_splat_8i32_4i32_33333333:
174; X32:       ## BB#0: ## %entry
175; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
176; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
177; X32-NEXT:    retl
178;
179; X64-LABEL: load_splat_8i32_4i32_33333333:
180; X64:       ## BB#0: ## %entry
181; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
182; X64-NEXT:    retq
183entry:
184  %ld = load <4 x i32>, <4 x i32>* %ptr
185  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
186  ret <8 x i32> %ret
187}
188
189define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
190; X32-LABEL: load_splat_8i32_8i32_55555555:
191; X32:       ## BB#0: ## %entry
192; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
194; X32-NEXT:    retl
195;
196; X64-LABEL: load_splat_8i32_8i32_55555555:
197; X64:       ## BB#0: ## %entry
198; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
199; X64-NEXT:    retq
200entry:
201  %ld = load <8 x i32>, <8 x i32>* %ptr
202  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
203  ret <8 x i32> %ret
204}
205
206define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
207; X32-LABEL: load_splat_4f32_4f32_1111:
208; X32:       ## BB#0: ## %entry
209; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
210; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
211; X32-NEXT:    retl
212;
213; X64-LABEL: load_splat_4f32_4f32_1111:
214; X64:       ## BB#0: ## %entry
215; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
216; X64-NEXT:    retq
217entry:
218  %ld = load <4 x float>, <4 x float>* %ptr
219  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
220  ret <4 x float> %ret
221}
222
223define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
224; X32-LABEL: load_splat_8f32_4f32_33333333:
225; X32:       ## BB#0: ## %entry
226; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
228; X32-NEXT:    retl
229;
230; X64-LABEL: load_splat_8f32_4f32_33333333:
231; X64:       ## BB#0: ## %entry
232; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
233; X64-NEXT:    retq
234entry:
235  %ld = load <4 x float>, <4 x float>* %ptr
236  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
237  ret <8 x float> %ret
238}
239
240define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
241; X32-LABEL: load_splat_8f32_8f32_55555555:
242; X32:       ## BB#0: ## %entry
243; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
245; X32-NEXT:    retl
246;
247; X64-LABEL: load_splat_8f32_8f32_55555555:
248; X64:       ## BB#0: ## %entry
249; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
250; X64-NEXT:    retq
251entry:
252  %ld = load <8 x float>, <8 x float>* %ptr
253  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
254  ret <8 x float> %ret
255}
256
257define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
258; X32-LABEL: load_splat_2i64_2i64_1111:
259; X32:       ## BB#0: ## %entry
260; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
261; X32-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
262; X32-NEXT:    retl
263;
264; X64-LABEL: load_splat_2i64_2i64_1111:
265; X64:       ## BB#0: ## %entry
266; X64-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
267; X64-NEXT:    retq
268entry:
269  %ld = load <2 x i64>, <2 x i64>* %ptr
270  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
271  ret <2 x i64> %ret
272}
273
274define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
275; X32-LABEL: load_splat_4i64_2i64_1111:
276; X32:       ## BB#0: ## %entry
277; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
278; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
279; X32-NEXT:    retl
280;
281; X64-LABEL: load_splat_4i64_2i64_1111:
282; X64:       ## BB#0: ## %entry
283; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
284; X64-NEXT:    retq
285entry:
286  %ld = load <2 x i64>, <2 x i64>* %ptr
287  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
288  ret <4 x i64> %ret
289}
290
291define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
292; X32-LABEL: load_splat_4i64_4i64_2222:
293; X32:       ## BB#0: ## %entry
294; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
295; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
296; X32-NEXT:    retl
297;
298; X64-LABEL: load_splat_4i64_4i64_2222:
299; X64:       ## BB#0: ## %entry
300; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
301; X64-NEXT:    retq
302entry:
303  %ld = load <4 x i64>, <4 x i64>* %ptr
304  %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
305  ret <4 x i64> %ret
306}
307
308define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
309; X32-LABEL: load_splat_2f64_2f64_1111:
310; X32:       ## BB#0: ## %entry
311; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
312; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
313; X32-NEXT:    retl
314;
315; X64-LABEL: load_splat_2f64_2f64_1111:
316; X64:       ## BB#0: ## %entry
317; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
318; X64-NEXT:    retq
319entry:
320  %ld = load <2 x double>, <2 x double>* %ptr
321  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
322  ret <2 x double> %ret
323}
324
325define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
326; X32-LABEL: load_splat_4f64_2f64_1111:
327; X32:       ## BB#0: ## %entry
328; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
329; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
330; X32-NEXT:    retl
331;
332; X64-LABEL: load_splat_4f64_2f64_1111:
333; X64:       ## BB#0: ## %entry
334; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
335; X64-NEXT:    retq
336entry:
337  %ld = load <2 x double>, <2 x double>* %ptr
338  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
339  ret <4 x double> %ret
340}
341
342define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
343; X32-LABEL: load_splat_4f64_4f64_2222:
344; X32:       ## BB#0: ## %entry
345; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
346; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
347; X32-NEXT:    retl
348;
349; X64-LABEL: load_splat_4f64_4f64_2222:
350; X64:       ## BB#0: ## %entry
351; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
352; X64-NEXT:    retq
353entry:
354  %ld = load <4 x double>, <4 x double>* %ptr
355  %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
356  ret <4 x double> %ret
357}
358
359; Unsupported vbroadcasts
360
361define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
362; X32-LABEL: G:
363; X32:       ## BB#0: ## %entry
364; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
365; X32-NEXT:    movl (%eax), %ecx
366; X32-NEXT:    movl 4(%eax), %eax
367; X32-NEXT:    vmovd %ecx, %xmm0
368; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
369; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
370; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
371; X32-NEXT:    retl
372;
373; X64-LABEL: G:
374; X64:       ## BB#0: ## %entry
375; X64-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
376; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
377; X64-NEXT:    retq
378entry:
379  %q = load i64, i64* %ptr, align 8
380  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
381  %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
382  ret <2 x i64> %vecinit2.i
383}
384
385define <4 x i32> @H(<4 x i32> %a) {
386; X32-LABEL: H:
387; X32:       ## BB#0: ## %entry
388; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
389; X32-NEXT:    retl
390;
391; X64-LABEL: H:
392; X64:       ## BB#0: ## %entry
393; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
394; X64-NEXT:    retq
395entry:
396  %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
397  ret <4 x i32> %x
398}
399
400define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
401; X32-LABEL: I:
402; X32:       ## BB#0: ## %entry
403; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
404; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
405; X32-NEXT:    retl
406;
407; X64-LABEL: I:
408; X64:       ## BB#0: ## %entry
409; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
410; X64-NEXT:    retq
411entry:
412  %q = load double, double* %ptr, align 4
413  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
414  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
415  ret <2 x double> %vecinit2.i
416}
417
418define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
419; X32-LABEL: _RR:
420; X32:       ## BB#0: ## %entry
421; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
422; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
423; X32-NEXT:    vbroadcastss (%ecx), %xmm0
424; X32-NEXT:    movl (%eax), %eax
425; X32-NEXT:    movl %eax, (%eax)
426; X32-NEXT:    retl
427;
428; X64-LABEL: _RR:
429; X64:       ## BB#0: ## %entry
430; X64-NEXT:    vbroadcastss (%rdi), %xmm0
431; X64-NEXT:    movl (%rsi), %eax
432; X64-NEXT:    movl %eax, (%rax)
433; X64-NEXT:    retq
434entry:
435  %q = load float, float* %ptr, align 4
436  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
437  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
438  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
439  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
440  ; force a chain
441  %j = load i32, i32* %k, align 4
442  store i32 %j, i32* undef
443  ret <4 x float> %vecinit6.i
444}
445
446define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
447; X32-LABEL: _RR2:
448; X32:       ## BB#0: ## %entry
449; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
450; X32-NEXT:    vbroadcastss (%eax), %xmm0
451; X32-NEXT:    retl
452;
453; X64-LABEL: _RR2:
454; X64:       ## BB#0: ## %entry
455; X64-NEXT:    vbroadcastss (%rdi), %xmm0
456; X64-NEXT:    retq
457entry:
458  %q = load float, float* %ptr, align 4
459  %v = insertelement <4 x float> undef, float %q, i32 0
460  %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
461  ret <4 x float> %t
462}
463
464; These tests check that a vbroadcast instruction is used when we have a splat
465; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
466; (via the insertelements).
467
468define <8 x float> @splat_concat1(float* %p) {
469; X32-LABEL: splat_concat1:
470; X32:       ## BB#0:
471; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
472; X32-NEXT:    vbroadcastss (%eax), %ymm0
473; X32-NEXT:    retl
474;
475; X64-LABEL: splat_concat1:
476; X64:       ## BB#0:
477; X64-NEXT:    vbroadcastss (%rdi), %ymm0
478; X64-NEXT:    retq
479  %1 = load float, float* %p, align 4
480  %2 = insertelement <4 x float> undef, float %1, i32 0
481  %3 = insertelement <4 x float> %2, float %1, i32 1
482  %4 = insertelement <4 x float> %3, float %1, i32 2
483  %5 = insertelement <4 x float> %4, float %1, i32 3
484  %6 = shufflevector <4 x float> %5, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
485  ret <8 x float> %6
486}
487
488define <8 x float> @splat_concat2(float* %p) {
489; X32-LABEL: splat_concat2:
490; X32:       ## BB#0:
491; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
492; X32-NEXT:    vbroadcastss (%eax), %ymm0
493; X32-NEXT:    retl
494;
495; X64-LABEL: splat_concat2:
496; X64:       ## BB#0:
497; X64-NEXT:    vbroadcastss (%rdi), %ymm0
498; X64-NEXT:    retq
499  %1 = load float, float* %p, align 4
500  %2 = insertelement <4 x float> undef, float %1, i32 0
501  %3 = insertelement <4 x float> %2, float %1, i32 1
502  %4 = insertelement <4 x float> %3, float %1, i32 2
503  %5 = insertelement <4 x float> %4, float %1, i32 3
504  %6 = insertelement <4 x float> undef, float %1, i32 0
505  %7 = insertelement <4 x float> %6, float %1, i32 1
506  %8 = insertelement <4 x float> %7, float %1, i32 2
507  %9 = insertelement <4 x float> %8, float %1, i32 3
508  %10 = shufflevector <4 x float> %5, <4 x float> %9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
509  ret <8 x float> %10
510}
511
512define <4 x double> @splat_concat3(double* %p) {
513; X32-LABEL: splat_concat3:
514; X32:       ## BB#0:
515; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
516; X32-NEXT:    vbroadcastsd (%eax), %ymm0
517; X32-NEXT:    retl
518;
519; X64-LABEL: splat_concat3:
520; X64:       ## BB#0:
521; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
522; X64-NEXT:    retq
523  %1 = load double, double* %p, align 8
524  %2 = insertelement <2 x double> undef, double %1, i32 0
525  %3 = insertelement <2 x double> %2, double %1, i32 1
526  %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
527  ret <4 x double> %4
528}
529
530define <4 x double> @splat_concat4(double* %p) {
531; X32-LABEL: splat_concat4:
532; X32:       ## BB#0:
533; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
534; X32-NEXT:    vbroadcastsd (%eax), %ymm0
535; X32-NEXT:    retl
536;
537; X64-LABEL: splat_concat4:
538; X64:       ## BB#0:
539; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
540; X64-NEXT:    retq
541  %1 = load double, double* %p, align 8
542  %2 = insertelement <2 x double> undef, double %1, i32 0
543  %3 = insertelement <2 x double> %2, double %1, i32 1
544  %4 = insertelement <2 x double> undef, double %1, i32 0
545  %5 = insertelement <2 x double> %2, double %1, i32 1
546  %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
547  ret <4 x double> %6
548}
549