1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
3
4define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
5; CHECK-LABEL: funcA:
6; CHECK:       ## BB#0: ## %entry
7; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
8; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
9; CHECK-NEXT:    retq
10entry:
11  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
12  ret <32 x i8> %shuffle
13}
14
15define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
16; CHECK-LABEL: funcB:
17; CHECK:       ## BB#0: ## %entry
18; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
19; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
20; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
21; CHECK-NEXT:    retq
22entry:
23  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
24  ret <16 x i16> %shuffle
25}
26
27define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
28; CHECK-LABEL: funcC:
29; CHECK:       ## BB#0: ## %entry
30; CHECK-NEXT:    vmovq %rdi, %xmm0
31; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
32; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
33; CHECK-NEXT:    retq
34entry:
35  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
36  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
37  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
38  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
39  ret <4 x i64> %vecinit6.i
40}
41
42define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
43; CHECK-LABEL: funcD:
44; CHECK:       ## BB#0: ## %entry
45; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
46; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
47; CHECK-NEXT:    retq
48entry:
49  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
50  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
51  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
52  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
53  ret <4 x double> %vecinit6.i
54}
55
56; Test this turns into a broadcast:
57;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
58;
59define <8 x float> @funcE() nounwind {
60; CHECK-LABEL: funcE:
61; CHECK:       ## BB#0: ## %for_exit499
62; CHECK-NEXT:    xorl %eax, %eax
63; CHECK-NEXT:    ## implicit-def: %YMM0
64; CHECK-NEXT:    testb %al, %al
65; CHECK-NEXT:    jne LBB4_2
66; CHECK-NEXT:  ## BB#1: ## %load.i1247
67; CHECK-NEXT:    pushq %rbp
68; CHECK-NEXT:    movq %rsp, %rbp
69; CHECK-NEXT:    andq $-32, %rsp
70; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
71; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
72; CHECK-NEXT:    movq %rbp, %rsp
73; CHECK-NEXT:    popq %rbp
74; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
75; CHECK-NEXT:    retq
76allocas:
77  %udx495 = alloca [18 x [18 x float]], align 32
78  br label %for_test505.preheader
79
80for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
81  br i1 undef, label %for_exit499, label %for_test505.preheader
82
83for_exit499:                                      ; preds = %for_test505.preheader
84  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
85
86load.i1247:                                       ; preds = %for_exit499
87  %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
88  %ptr.i1237 = bitcast float* %ptr1227 to i32*
89  %val.i1238 = load i32, i32* %ptr.i1237, align 4
90  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
91  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
92  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
93  br label %__load_and_broadcast_32.exit1249
94
95__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
96  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
97  ret <8 x float> %load_broadcast12281250
98}
99
100define <8 x float> @funcF(i32 %val) nounwind {
101; CHECK-LABEL: funcF:
102; CHECK:       ## BB#0:
103; CHECK-NEXT:    vmovd %edi, %xmm0
104; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
105; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
106; CHECK-NEXT:    retq
107  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
108  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
109  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
110  ret <8 x float> %tmp
111}
112
113define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
114; CHECK-LABEL: funcG:
115; CHECK:       ## BB#0: ## %entry
116; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
117; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
118; CHECK-NEXT:    retq
119entry:
120  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121  ret <8 x float> %shuffle
122}
123
124define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
125; CHECK-LABEL: funcH:
126; CHECK:       ## BB#0: ## %entry
127; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
128; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
129; CHECK-NEXT:    retq
130entry:
131  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
132  ret <8 x float> %shuffle
133}
134
135define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
136; CHECK-LABEL: splat_load_2f64_11:
137; CHECK:       ## BB#0:
138; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
139; CHECK-NEXT:    retq
140  %x = load <2 x double>, <2 x double>* %ptr
141  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
142  ret <2 x double> %x1
143}
144
145define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146; CHECK-LABEL: splat_load_4f64_2222:
147; CHECK:       ## BB#0:
148; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
149; CHECK-NEXT:    retq
150  %x = load <4 x double>, <4 x double>* %ptr
151  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
152  ret <4 x double> %x1
153}
154
155define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
156; CHECK-LABEL: splat_load_4f32_0000:
157; CHECK:       ## BB#0:
158; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
159; CHECK-NEXT:    retq
160  %x = load <4 x float>, <4 x float>* %ptr
161  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
162  ret <4 x float> %x1
163}
164
165define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
166; CHECK-LABEL: splat_load_8f32_77777777:
167; CHECK:       ## BB#0:
168; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
169; CHECK-NEXT:    retq
170  %x = load <8 x float>, <8 x float>* %ptr
171  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
172  ret <8 x float> %x1
173}
174