1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
4
5define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
6; X32-LABEL: BB16:
7; X32:       ## BB#0: ## %entry
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    vpbroadcastb (%eax), %xmm0
10; X32-NEXT:    retl
11;
12; X64-LABEL: BB16:
13; X64:       ## BB#0: ## %entry
14; X64-NEXT:    vpbroadcastb (%rdi), %xmm0
15; X64-NEXT:    retq
16entry:
17  %q = load i8, i8* %ptr, align 4
18  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
19  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
20  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
21  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
22  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
23  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
24  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
25  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
26  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
27  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
28  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
29  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
30  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
31  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
32  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
33  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
34  ret <16 x i8> %qf
35}
36
37define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
38; X32-LABEL: BB32:
39; X32:       ## BB#0: ## %entry
40; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
41; X32-NEXT:    vpbroadcastb (%eax), %ymm0
42; X32-NEXT:    retl
43;
44; X64-LABEL: BB32:
45; X64:       ## BB#0: ## %entry
46; X64-NEXT:    vpbroadcastb (%rdi), %ymm0
47; X64-NEXT:    retq
48entry:
49  %q = load i8, i8* %ptr, align 4
50  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
51  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
52  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
53  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
54  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
55  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
56  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
57  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
58  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
59  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
60  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
61  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
62  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
63  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
64  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
65  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
66
67  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
68  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
69  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
70  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
71  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
72  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
73  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
74  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
75  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
76  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
77  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
78  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
79  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
80  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
81  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
82  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
83  ret <32 x i8> %q2f
84}
85
86define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
87; X32-LABEL: W16:
88; X32:       ## BB#0: ## %entry
89; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
90; X32-NEXT:    vpbroadcastw (%eax), %xmm0
91; X32-NEXT:    retl
92;
93; X64-LABEL: W16:
94; X64:       ## BB#0: ## %entry
95; X64-NEXT:    vpbroadcastw (%rdi), %xmm0
96; X64-NEXT:    retq
97entry:
98  %q = load i16, i16* %ptr, align 4
99  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
100  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
101  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
102  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
103  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
104  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
105  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
106  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
107  ret <8 x i16> %q7
108}
109
110define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
111; X32-LABEL: WW16:
112; X32:       ## BB#0: ## %entry
113; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
114; X32-NEXT:    vpbroadcastw (%eax), %ymm0
115; X32-NEXT:    retl
116;
117; X64-LABEL: WW16:
118; X64:       ## BB#0: ## %entry
119; X64-NEXT:    vpbroadcastw (%rdi), %ymm0
120; X64-NEXT:    retq
121entry:
122  %q = load i16, i16* %ptr, align 4
123  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
124  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
125  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
126  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
127  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
128  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
129  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
130  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
131  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
132  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
133  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
134  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
135  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
136  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
137  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
138  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
139  ret <16 x i16> %qf
140}
141
142define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
143; X32-LABEL: D32:
144; X32:       ## BB#0: ## %entry
145; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
146; X32-NEXT:    vbroadcastss (%eax), %xmm0
147; X32-NEXT:    retl
148;
149; X64-LABEL: D32:
150; X64:       ## BB#0: ## %entry
151; X64-NEXT:    vbroadcastss (%rdi), %xmm0
152; X64-NEXT:    retq
153entry:
154  %q = load i32, i32* %ptr, align 4
155  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
156  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
157  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
158  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
159  ret <4 x i32> %q3
160}
161
162define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
163; X32-LABEL: DD32:
164; X32:       ## BB#0: ## %entry
165; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
166; X32-NEXT:    vbroadcastss (%eax), %ymm0
167; X32-NEXT:    retl
168;
169; X64-LABEL: DD32:
170; X64:       ## BB#0: ## %entry
171; X64-NEXT:    vbroadcastss (%rdi), %ymm0
172; X64-NEXT:    retq
173entry:
174  %q = load i32, i32* %ptr, align 4
175  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
176  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
177  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
178  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
179  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
180  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
181  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
182  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
183  ret <8 x i32> %q7
184}
185
186define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
187; X32-LABEL: Q64:
188; X32:       ## BB#0: ## %entry
189; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
190; X32-NEXT:    movl (%eax), %ecx
191; X32-NEXT:    movl 4(%eax), %eax
192; X32-NEXT:    vmovd %ecx, %xmm0
193; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
194; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
195; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
196; X32-NEXT:    retl
197;
198; X64-LABEL: Q64:
199; X64:       ## BB#0: ## %entry
200; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
201; X64-NEXT:    retq
202entry:
203  %q = load i64, i64* %ptr, align 4
204  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
205  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
206  ret <2 x i64> %q1
207}
208
209define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
210; X32-LABEL: QQ64:
211; X32:       ## BB#0: ## %entry
212; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
213; X32-NEXT:    movl (%eax), %ecx
214; X32-NEXT:    movl 4(%eax), %eax
215; X32-NEXT:    vmovd %ecx, %xmm0
216; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
217; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
218; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
219; X32-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
220; X32-NEXT:    retl
221;
222; X64-LABEL: QQ64:
223; X64:       ## BB#0: ## %entry
224; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
225; X64-NEXT:    retq
226entry:
227  %q = load i64, i64* %ptr, align 4
228  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
229  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
230  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
231  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
232  ret <4 x i64> %q3
233}
234
235; FIXME: Pointer adjusted broadcasts
236
237define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
238; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
239; X32:       ## BB#0: ## %entry
240; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
241; X32-NEXT:    vpbroadcastb 1(%eax), %xmm0
242; X32-NEXT:    retl
243;
244; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
245; X64:       ## BB#0: ## %entry
246; X64-NEXT:    vpbroadcastb 1(%rdi), %xmm0
247; X64-NEXT:    retq
248entry:
249  %ld = load <16 x i8>, <16 x i8>* %ptr
250  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
251  ret <16 x i8> %ret
252}
253
254define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
255; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
256; X32:       ## BB#0: ## %entry
257; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
258; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
259; X32-NEXT:    retl
260;
261; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
262; X64:       ## BB#0: ## %entry
263; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
264; X64-NEXT:    retq
265entry:
266  %ld = load <16 x i8>, <16 x i8>* %ptr
267  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
268  ret <32 x i8> %ret
269}
270
271define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
272; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
273; X32:       ## BB#0: ## %entry
274; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
275; X32-NEXT:    vpbroadcastb 1(%eax), %ymm0
276; X32-NEXT:    retl
277;
278; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
279; X64:       ## BB#0: ## %entry
280; X64-NEXT:    vpbroadcastb 1(%rdi), %ymm0
281; X64-NEXT:    retq
282entry:
283  %ld = load <32 x i8>, <32 x i8>* %ptr
284  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
285  ret <32 x i8> %ret
286}
287
288define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
289; X32-LABEL: load_splat_8i16_8i16_11111111:
290; X32:       ## BB#0: ## %entry
291; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
292; X32-NEXT:    vpbroadcastw 2(%eax), %xmm0
293; X32-NEXT:    retl
294;
295; X64-LABEL: load_splat_8i16_8i16_11111111:
296; X64:       ## BB#0: ## %entry
297; X64-NEXT:    vpbroadcastw 2(%rdi), %xmm0
298; X64-NEXT:    retq
299entry:
300  %ld = load <8 x i16>, <8 x i16>* %ptr
301  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
302  ret <8 x i16> %ret
303}
304
305define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
306; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
307; X32:       ## BB#0: ## %entry
308; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
309; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
310; X32-NEXT:    retl
311;
312; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
313; X64:       ## BB#0: ## %entry
314; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
315; X64-NEXT:    retq
316entry:
317  %ld = load <8 x i16>, <8 x i16>* %ptr
318  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
319  ret <16 x i16> %ret
320}
321
322define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
323; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
324; X32:       ## BB#0: ## %entry
325; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
326; X32-NEXT:    vpbroadcastw 2(%eax), %ymm0
327; X32-NEXT:    retl
328;
329; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
330; X64:       ## BB#0: ## %entry
331; X64-NEXT:    vpbroadcastw 2(%rdi), %ymm0
332; X64-NEXT:    retq
333entry:
334  %ld = load <16 x i16>, <16 x i16>* %ptr
335  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
336  ret <16 x i16> %ret
337}
338
339define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
340; X32-LABEL: load_splat_4i32_4i32_1111:
341; X32:       ## BB#0: ## %entry
342; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
343; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
344; X32-NEXT:    retl
345;
346; X64-LABEL: load_splat_4i32_4i32_1111:
347; X64:       ## BB#0: ## %entry
348; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
349; X64-NEXT:    retq
350entry:
351  %ld = load <4 x i32>, <4 x i32>* %ptr
352  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
353  ret <4 x i32> %ret
354}
355
356define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
357; X32-LABEL: load_splat_8i32_4i32_33333333:
358; X32:       ## BB#0: ## %entry
359; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
360; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
361; X32-NEXT:    retl
362;
363; X64-LABEL: load_splat_8i32_4i32_33333333:
364; X64:       ## BB#0: ## %entry
365; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
366; X64-NEXT:    retq
367entry:
368  %ld = load <4 x i32>, <4 x i32>* %ptr
369  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
370  ret <8 x i32> %ret
371}
372
373define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
374; X32-LABEL: load_splat_8i32_8i32_55555555:
375; X32:       ## BB#0: ## %entry
376; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
377; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
378; X32-NEXT:    retl
379;
380; X64-LABEL: load_splat_8i32_8i32_55555555:
381; X64:       ## BB#0: ## %entry
382; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
383; X64-NEXT:    retq
384entry:
385  %ld = load <8 x i32>, <8 x i32>* %ptr
386  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
387  ret <8 x i32> %ret
388}
389
390define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
391; X32-LABEL: load_splat_4f32_4f32_1111:
392; X32:       ## BB#0: ## %entry
393; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
394; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
395; X32-NEXT:    retl
396;
397; X64-LABEL: load_splat_4f32_4f32_1111:
398; X64:       ## BB#0: ## %entry
399; X64-NEXT:    vbroadcastss 4(%rdi), %xmm0
400; X64-NEXT:    retq
401entry:
402  %ld = load <4 x float>, <4 x float>* %ptr
403  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
404  ret <4 x float> %ret
405}
406
407define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
408; X32-LABEL: load_splat_8f32_4f32_33333333:
409; X32:       ## BB#0: ## %entry
410; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
411; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
412; X32-NEXT:    retl
413;
414; X64-LABEL: load_splat_8f32_4f32_33333333:
415; X64:       ## BB#0: ## %entry
416; X64-NEXT:    vbroadcastss 12(%rdi), %ymm0
417; X64-NEXT:    retq
418entry:
419  %ld = load <4 x float>, <4 x float>* %ptr
420  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
421  ret <8 x float> %ret
422}
423
424define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
425; X32-LABEL: load_splat_8f32_8f32_55555555:
426; X32:       ## BB#0: ## %entry
427; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
428; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
429; X32-NEXT:    retl
430;
431; X64-LABEL: load_splat_8f32_8f32_55555555:
432; X64:       ## BB#0: ## %entry
433; X64-NEXT:    vbroadcastss 20(%rdi), %ymm0
434; X64-NEXT:    retq
435entry:
436  %ld = load <8 x float>, <8 x float>* %ptr
437  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
438  ret <8 x float> %ret
439}
440
441define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
442; X32-LABEL: load_splat_2i64_2i64_1111:
443; X32:       ## BB#0: ## %entry
444; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
445; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
446; X32-NEXT:    retl
447;
448; X64-LABEL: load_splat_2i64_2i64_1111:
449; X64:       ## BB#0: ## %entry
450; X64-NEXT:    vpbroadcastq 8(%rdi), %xmm0
451; X64-NEXT:    retq
452entry:
453  %ld = load <2 x i64>, <2 x i64>* %ptr
454  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
455  ret <2 x i64> %ret
456}
457
458define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
459; X32-LABEL: load_splat_4i64_2i64_1111:
460; X32:       ## BB#0: ## %entry
461; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
462; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
463; X32-NEXT:    retl
464;
465; X64-LABEL: load_splat_4i64_2i64_1111:
466; X64:       ## BB#0: ## %entry
467; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
468; X64-NEXT:    retq
469entry:
470  %ld = load <2 x i64>, <2 x i64>* %ptr
471  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
472  ret <4 x i64> %ret
473}
474
475define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
476; X32-LABEL: load_splat_4i64_4i64_2222:
477; X32:       ## BB#0: ## %entry
478; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
479; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
480; X32-NEXT:    retl
481;
482; X64-LABEL: load_splat_4i64_4i64_2222:
483; X64:       ## BB#0: ## %entry
484; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
485; X64-NEXT:    retq
486entry:
487  %ld = load <4 x i64>, <4 x i64>* %ptr
488  %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
489  ret <4 x i64> %ret
490}
491
492define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
493; X32-LABEL: load_splat_2f64_2f64_1111:
494; X32:       ## BB#0: ## %entry
495; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
496; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
497; X32-NEXT:    retl
498;
499; X64-LABEL: load_splat_2f64_2f64_1111:
500; X64:       ## BB#0: ## %entry
501; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
502; X64-NEXT:    retq
503entry:
504  %ld = load <2 x double>, <2 x double>* %ptr
505  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
506  ret <2 x double> %ret
507}
508
509define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
510; X32-LABEL: load_splat_4f64_2f64_1111:
511; X32:       ## BB#0: ## %entry
512; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
513; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
514; X32-NEXT:    retl
515;
516; X64-LABEL: load_splat_4f64_2f64_1111:
517; X64:       ## BB#0: ## %entry
518; X64-NEXT:    vbroadcastsd 8(%rdi), %ymm0
519; X64-NEXT:    retq
520entry:
521  %ld = load <2 x double>, <2 x double>* %ptr
522  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
523  ret <4 x double> %ret
524}
525
526define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
527; X32-LABEL: load_splat_4f64_4f64_2222:
528; X32:       ## BB#0: ## %entry
529; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
530; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
531; X32-NEXT:    retl
532;
533; X64-LABEL: load_splat_4f64_4f64_2222:
534; X64:       ## BB#0: ## %entry
535; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
536; X64-NEXT:    retq
537entry:
538  %ld = load <4 x double>, <4 x double>* %ptr
539  %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
540  ret <4 x double> %ret
541}
542
543; make sure that we still don't support broadcast double into 128-bit vector
544; this used to crash
545define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
546; X32-LABEL: I:
547; X32:       ## BB#0: ## %entry
548; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
549; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
550; X32-NEXT:    retl
551;
552; X64-LABEL: I:
553; X64:       ## BB#0: ## %entry
554; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
555; X64-NEXT:    retq
556entry:
557  %q = load double, double* %ptr, align 4
558  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
559  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
560  ret <2 x double> %vecinit2.i
561}
562
563define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
564; X32-LABEL: V111:
565; X32:       ## BB#0: ## %entry
566; X32-NEXT:    vpbroadcastd LCPI27_0, %ymm1
567; X32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
568; X32-NEXT:    retl
569;
570; X64-LABEL: V111:
571; X64:       ## BB#0: ## %entry
572; X64-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
573; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
574; X64-NEXT:    retq
575entry:
576  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
577  ret <8 x i32> %g
578}
579
580define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
581; X32-LABEL: V113:
582; X32:       ## BB#0: ## %entry
583; X32-NEXT:    vbroadcastss LCPI28_0, %ymm1
584; X32-NEXT:    vaddps %ymm1, %ymm0, %ymm0
585; X32-NEXT:    retl
586;
587; X64-LABEL: V113:
588; X64:       ## BB#0: ## %entry
589; X64-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
590; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0
591; X64-NEXT:    retq
592entry:
593  %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
594  ret <8 x float> %g
595}
596
597define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
598; X32-LABEL: _e2:
599; X32:       ## BB#0:
600; X32-NEXT:    vbroadcastss LCPI29_0, %xmm0
601; X32-NEXT:    retl
602;
603; X64-LABEL: _e2:
604; X64:       ## BB#0:
605; X64-NEXT:    vbroadcastss {{.*}}(%rip), %xmm0
606; X64-NEXT:    retq
607  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
608  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
609  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
610  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
611  ret <4 x float> %vecinit6.i
612}
613
614define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
615; X32-LABEL: _e4:
616; X32:       ## BB#0:
617; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
618; X32-NEXT:    retl
619;
620; X64-LABEL: _e4:
621; X64:       ## BB#0:
622; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
623; X64-NEXT:    retq
624  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
625  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
626  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
627  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
628  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
629  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
630  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
631  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
632  ret <8 x i8> %vecinit7.i
633}
634
635define void @crash() nounwind alwaysinline {
636; X32-LABEL: crash:
637; X32:       ## BB#0: ## %WGLoopsEntry
638; X32-NEXT:    xorl %eax, %eax
639; X32-NEXT:    testb %al, %al
640; X32-NEXT:    je LBB31_1
641; X32-NEXT:  ## BB#2: ## %ret
642; X32-NEXT:    retl
643; X32-NEXT:    .p2align 4, 0x90
644; X32-NEXT:  LBB31_1: ## %footer349VF
645; X32-NEXT:    ## =>This Inner Loop Header: Depth=1
646; X32-NEXT:    jmp LBB31_1
647;
648; X64-LABEL: crash:
649; X64:       ## BB#0: ## %WGLoopsEntry
650; X64-NEXT:    xorl %eax, %eax
651; X64-NEXT:    testb %al, %al
652; X64-NEXT:    je LBB31_1
653; X64-NEXT:  ## BB#2: ## %ret
654; X64-NEXT:    retq
655; X64-NEXT:    .p2align 4, 0x90
656; X64-NEXT:  LBB31_1: ## %footer349VF
657; X64-NEXT:    ## =>This Inner Loop Header: Depth=1
658; X64-NEXT:    jmp LBB31_1
659WGLoopsEntry:
660  br i1 undef, label %ret, label %footer329VF
661
662footer329VF:
663  %A.0.inVF = fmul float undef, 6.553600e+04
664  %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
665  %A.0VF = fptosi float %A.0.inVF to i32
666  %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
667  %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
668  %1 = and i32 %A.0VF, 65535
669  %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
670  %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
671  br i1 undef, label %preload1201VF, label %footer349VF
672
673preload1201VF:
674  br label %footer349VF
675
676footer349VF:
677  %2 = mul nsw <8 x i32> undef, %0
678  %3 = mul nsw <8 x i32> undef, %vector1099VF
679  br label %footer329VF
680
681ret:
682  ret void
683}
684
685define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
686; X32-LABEL: _inreg0:
687; X32:       ## BB#0:
688; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
689; X32-NEXT:    retl
690;
691; X64-LABEL: _inreg0:
692; X64:       ## BB#0:
693; X64-NEXT:    vmovd %edi, %xmm0
694; X64-NEXT:    vbroadcastss %xmm0, %ymm0
695; X64-NEXT:    retq
696  %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
697  %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
698  ret <8 x i32> %wide
699}
700
701define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
702; X32-LABEL: _inreg1:
703; X32:       ## BB#0:
704; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
705; X32-NEXT:    retl
706;
707; X64-LABEL: _inreg1:
708; X64:       ## BB#0:
709; X64-NEXT:    vbroadcastss %xmm0, %ymm0
710; X64-NEXT:    retq
711  %in = insertelement <8 x float> undef, float %scalar, i32 0
712  %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
713  ret <8 x float> %wide
714}
715
716define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
717; X32-LABEL: _inreg2:
718; X32:       ## BB#0:
719; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
720; X32-NEXT:    retl
721;
722; X64-LABEL: _inreg2:
723; X64:       ## BB#0:
724; X64-NEXT:    vbroadcastss %xmm0, %xmm0
725; X64-NEXT:    retq
726  %in = insertelement <4 x float> undef, float %scalar, i32 0
727  %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
728  ret <4 x float> %wide
729}
730
731define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
732; X32-LABEL: _inreg3:
733; X32:       ## BB#0:
734; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
735; X32-NEXT:    retl
736;
737; X64-LABEL: _inreg3:
738; X64:       ## BB#0:
739; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
740; X64-NEXT:    retq
741  %in = insertelement <4 x double> undef, double %scalar, i32 0
742  %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
743  ret <4 x double> %wide
744}
745
746define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
747; X32-LABEL: _inreg8xfloat:
748; X32:       ## BB#0:
749; X32-NEXT:    vbroadcastss %xmm0, %ymm0
750; X32-NEXT:    retl
751;
752; X64-LABEL: _inreg8xfloat:
753; X64:       ## BB#0:
754; X64-NEXT:    vbroadcastss %xmm0, %ymm0
755; X64-NEXT:    retq
756  %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
757  ret <8 x float> %b
758}
759
760define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
761; X32-LABEL: _inreg4xfloat:
762; X32:       ## BB#0:
763; X32-NEXT:    vbroadcastss %xmm0, %xmm0
764; X32-NEXT:    retl
765;
766; X64-LABEL: _inreg4xfloat:
767; X64:       ## BB#0:
768; X64-NEXT:    vbroadcastss %xmm0, %xmm0
769; X64-NEXT:    retq
770  %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
771  ret <4 x float> %b
772}
773
774define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
775; X32-LABEL: _inreg16xi16:
776; X32:       ## BB#0:
777; X32-NEXT:    vpbroadcastw %xmm0, %ymm0
778; X32-NEXT:    retl
779;
780; X64-LABEL: _inreg16xi16:
781; X64:       ## BB#0:
782; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
783; X64-NEXT:    retq
784  %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
785  ret <16 x i16> %b
786}
787
788define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
789; X32-LABEL: _inreg8xi16:
790; X32:       ## BB#0:
791; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
792; X32-NEXT:    retl
793;
794; X64-LABEL: _inreg8xi16:
795; X64:       ## BB#0:
796; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
797; X64-NEXT:    retq
798  %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
799  ret <8 x i16> %b
800}
801
802define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
803; X32-LABEL: _inreg4xi64:
804; X32:       ## BB#0:
805; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
806; X32-NEXT:    retl
807;
808; X64-LABEL: _inreg4xi64:
809; X64:       ## BB#0:
810; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
811; X64-NEXT:    retq
812  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
813  ret <4 x i64> %b
814}
815
816define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
817; X32-LABEL: _inreg2xi64:
818; X32:       ## BB#0:
819; X32-NEXT:    vpbroadcastq %xmm0, %xmm0
820; X32-NEXT:    retl
821;
822; X64-LABEL: _inreg2xi64:
823; X64:       ## BB#0:
824; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
825; X64-NEXT:    retq
826  %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
827  ret <2 x i64> %b
828}
829
830define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
831; X32-LABEL: _inreg4xdouble:
832; X32:       ## BB#0:
833; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
834; X32-NEXT:    retl
835;
836; X64-LABEL: _inreg4xdouble:
837; X64:       ## BB#0:
838; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
839; X64-NEXT:    retq
840  %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
841  ret <4 x double> %b
842}
843
844define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
845; X32-LABEL: _inreg2xdouble:
846; X32:       ## BB#0:
847; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
848; X32-NEXT:    retl
849;
850; X64-LABEL: _inreg2xdouble:
851; X64:       ## BB#0:
852; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
853; X64-NEXT:    retq
854  %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
855  ret <2 x double> %b
856}
857
858define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
859; X32-LABEL: _inreg8xi32:
860; X32:       ## BB#0:
861; X32-NEXT:    vbroadcastss %xmm0, %ymm0
862; X32-NEXT:    retl
863;
864; X64-LABEL: _inreg8xi32:
865; X64:       ## BB#0:
866; X64-NEXT:    vbroadcastss %xmm0, %ymm0
867; X64-NEXT:    retq
868  %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
869  ret <8 x i32> %b
870}
871
872define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
873; X32-LABEL: _inreg4xi32:
874; X32:       ## BB#0:
875; X32-NEXT:    vbroadcastss %xmm0, %xmm0
876; X32-NEXT:    retl
877;
878; X64-LABEL: _inreg4xi32:
879; X64:       ## BB#0:
880; X64-NEXT:    vbroadcastss %xmm0, %xmm0
881; X64-NEXT:    retq
882  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
883  ret <4 x i32> %b
884}
885
886define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
887; X32-LABEL: _inreg32xi8:
888; X32:       ## BB#0:
889; X32-NEXT:    vpbroadcastb %xmm0, %ymm0
890; X32-NEXT:    retl
891;
892; X64-LABEL: _inreg32xi8:
893; X64:       ## BB#0:
894; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
895; X64-NEXT:    retq
896  %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
897  ret <32 x i8> %b
898}
899
900define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
901; X32-LABEL: _inreg16xi8:
902; X32:       ## BB#0:
903; X32-NEXT:    vpbroadcastb %xmm0, %xmm0
904; X32-NEXT:    retl
905;
906; X64-LABEL: _inreg16xi8:
907; X64:       ## BB#0:
908; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
909; X64-NEXT:    retq
910  %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
911  ret <16 x i8> %b
912}
913
914; These tests check that a vbroadcast instruction is used when we have a splat
915; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
916; (via the insertelements).
917
918define <8 x float> @splat_concat1(float %f) {
919; X32-LABEL: splat_concat1:
920; X32:       ## BB#0:
921; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
922; X32-NEXT:    retl
923;
924; X64-LABEL: splat_concat1:
925; X64:       ## BB#0:
926; X64-NEXT:    vbroadcastss %xmm0, %ymm0
927; X64-NEXT:    retq
928  %1 = insertelement <4 x float> undef, float %f, i32 0
929  %2 = insertelement <4 x float> %1, float %f, i32 1
930  %3 = insertelement <4 x float> %2, float %f, i32 2
931  %4 = insertelement <4 x float> %3, float %f, i32 3
932  %5 = shufflevector <4 x float> %4, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
933  ret <8 x float> %5
934}
935
936define <8 x float> @splat_concat2(float %f) {
937; X32-LABEL: splat_concat2:
938; X32:       ## BB#0:
939; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
940; X32-NEXT:    retl
941;
942; X64-LABEL: splat_concat2:
943; X64:       ## BB#0:
944; X64-NEXT:    vbroadcastss %xmm0, %ymm0
945; X64-NEXT:    retq
946  %1 = insertelement <4 x float> undef, float %f, i32 0
947  %2 = insertelement <4 x float> %1, float %f, i32 1
948  %3 = insertelement <4 x float> %2, float %f, i32 2
949  %4 = insertelement <4 x float> %3, float %f, i32 3
950  %5 = insertelement <4 x float> undef, float %f, i32 0
951  %6 = insertelement <4 x float> %5, float %f, i32 1
952  %7 = insertelement <4 x float> %6, float %f, i32 2
953  %8 = insertelement <4 x float> %7, float %f, i32 3
954  %9 = shufflevector <4 x float> %4, <4 x float> %8, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
955  ret <8 x float> %9
956}
957
958define <4 x double> @splat_concat3(double %d) {
959; X32-LABEL: splat_concat3:
960; X32:       ## BB#0:
961; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
962; X32-NEXT:    retl
963;
964; X64-LABEL: splat_concat3:
965; X64:       ## BB#0:
966; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
967; X64-NEXT:    retq
968  %1 = insertelement <2 x double> undef, double %d, i32 0
969  %2 = insertelement <2 x double> %1, double %d, i32 1
970  %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
971  ret <4 x double> %3
972}
973
974define <4 x double> @splat_concat4(double %d) {
975; X32-LABEL: splat_concat4:
976; X32:       ## BB#0:
977; X32-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
978; X32-NEXT:    retl
979;
980; X64-LABEL: splat_concat4:
981; X64:       ## BB#0:
982; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
983; X64-NEXT:    retq
984  %1 = insertelement <2 x double> undef, double %d, i32 0
985  %2 = insertelement <2 x double> %1, double %d, i32 1
986  %3 = insertelement <2 x double> undef, double %d, i32 0
987  %4 = insertelement <2 x double> %3, double %d, i32 1
988  %5 = shufflevector <2 x double> %2, <2 x double> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
989  ret <4 x double> %5
990}
991
992; Test cases for <rdar://problem/16074331>.
993; Instruction selection for broacast instruction fails if
994; the load cannot be folded into the broadcast.
995; This happens if the load has initial one use but other uses are
996; created later, or if selection DAG cannot prove that folding the
997; load will not create a cycle in the DAG.
998; Those test cases exerce the latter.
999
1000; CHECK-LABEL: isel_crash_16b
1001; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
1002; CHECK: ret
1003define void @isel_crash_16b(i8* %cV_R.addr) {
1004eintry:
1005  %__a.addr.i = alloca <2 x i64>, align 16
1006  %__b.addr.i = alloca <2 x i64>, align 16
1007  %vCr = alloca <2 x i64>, align 16
1008  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1009  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1010  %tmp2 = load i8, i8* %cV_R.addr, align 4
1011  %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
1012  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
1013  %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
1014  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1015  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1016  ret void
1017}
1018
1019; CHECK-LABEL: isel_crash_32b
1020; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
1021; CHECK: ret
1022define void @isel_crash_32b(i8* %cV_R.addr) {
1023eintry:
1024  %__a.addr.i = alloca <4 x i64>, align 16
1025  %__b.addr.i = alloca <4 x i64>, align 16
1026  %vCr = alloca <4 x i64>, align 16
1027  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1028  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1029  %tmp2 = load i8, i8* %cV_R.addr, align 4
1030  %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
1031  %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
1032  %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
1033  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1034  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1035  ret void
1036}
1037
1038; CHECK-LABEL: isel_crash_8w
1039; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
1040; CHECK: ret
1041define void @isel_crash_8w(i16* %cV_R.addr) {
1042entry:
1043  %__a.addr.i = alloca <2 x i64>, align 16
1044  %__b.addr.i = alloca <2 x i64>, align 16
1045  %vCr = alloca <2 x i64>, align 16
1046  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1047  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1048  %tmp2 = load i16, i16* %cV_R.addr, align 4
1049  %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
1050  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
1051  %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
1052  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1053  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1054  ret void
1055}
1056
1057; CHECK-LABEL: isel_crash_16w
1058; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
1059; CHECK: ret
1060define void @isel_crash_16w(i16* %cV_R.addr) {
1061eintry:
1062  %__a.addr.i = alloca <4 x i64>, align 16
1063  %__b.addr.i = alloca <4 x i64>, align 16
1064  %vCr = alloca <4 x i64>, align 16
1065  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1066  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1067  %tmp2 = load i16, i16* %cV_R.addr, align 4
1068  %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
1069  %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
1070  %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
1071  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1072  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1073  ret void
1074}
1075
1076; CHECK-LABEL: isel_crash_4d
1077; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
1078; CHECK: ret
1079define void @isel_crash_4d(i32* %cV_R.addr) {
1080entry:
1081  %__a.addr.i = alloca <2 x i64>, align 16
1082  %__b.addr.i = alloca <2 x i64>, align 16
1083  %vCr = alloca <2 x i64>, align 16
1084  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1085  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1086  %tmp2 = load i32, i32* %cV_R.addr, align 4
1087  %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
1088  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1089  %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
1090  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1091  store <2 x i64> %tmp3, <2 x i64>* %__b.addr.i, align 16
1092  ret void
1093}
1094
1095; CHECK-LABEL: isel_crash_8d
1096; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
1097; CHECK: ret
1098define void @isel_crash_8d(i32* %cV_R.addr) {
1099eintry:
1100  %__a.addr.i = alloca <4 x i64>, align 16
1101  %__b.addr.i = alloca <4 x i64>, align 16
1102  %vCr = alloca <4 x i64>, align 16
1103  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1104  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1105  %tmp2 = load i32, i32* %cV_R.addr, align 4
1106  %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
1107  %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
1108  %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
1109  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1110  store <4 x i64> %tmp3, <4 x i64>* %__b.addr.i, align 16
1111  ret void
1112}
1113
1114; X64-LABEL: isel_crash_2q
1115; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
1116; X64: ret
1117define void @isel_crash_2q(i64* %cV_R.addr) {
1118entry:
1119  %__a.addr.i = alloca <2 x i64>, align 16
1120  %__b.addr.i = alloca <2 x i64>, align 16
1121  %vCr = alloca <2 x i64>, align 16
1122  store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
1123  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
1124  %tmp2 = load i64, i64* %cV_R.addr, align 4
1125  %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
1126  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
1127  store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
1128  store <2 x i64> %splat.splat, <2 x i64>* %__b.addr.i, align 16
1129  ret void
1130}
1131
1132; X64-LABEL: isel_crash_4q
1133; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
1134; X64: ret
1135define void @isel_crash_4q(i64* %cV_R.addr) {
1136eintry:
1137  %__a.addr.i = alloca <4 x i64>, align 16
1138  %__b.addr.i = alloca <4 x i64>, align 16
1139  %vCr = alloca <4 x i64>, align 16
1140  store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
1141  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
1142  %tmp2 = load i64, i64* %cV_R.addr, align 4
1143  %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
1144  %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
1145  store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
1146  store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16
1147  ret void
1148}
1149