1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
6; CHECK-LABEL: funcA:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
9; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
10; CHECK-NEXT:    ret{{[l|q]}}
11entry:
12  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13  ret <32 x i8> %shuffle
14}
15
16define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
17; CHECK-LABEL: funcB:
18; CHECK:       # %bb.0: # %entry
19; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
20; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
21; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
22; CHECK-NEXT:    ret{{[l|q]}}
23entry:
24  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
25  ret <16 x i16> %shuffle
26}
27
28define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
29; X86-LABEL: funcC:
30; X86:       # %bb.0: # %entry
31; X86-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
32; X86-NEXT:    retl
33;
34; X64-LABEL: funcC:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    vmovq %rdi, %xmm0
37; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
38; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
39; X64-NEXT:    retq
40entry:
41  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
42  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
43  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
44  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
45  ret <4 x i64> %vecinit6.i
46}
47
48define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
49; X86-LABEL: funcD:
50; X86:       # %bb.0: # %entry
51; X86-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
52; X86-NEXT:    retl
53;
54; X64-LABEL: funcD:
55; X64:       # %bb.0: # %entry
56; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
57; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
58; X64-NEXT:    retq
59entry:
60  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
61  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
62  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
63  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
64  ret <4 x double> %vecinit6.i
65}
66
67; Test this turns into a broadcast:
68;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
69;
70define <8 x float> @funcE() nounwind {
71; X86-LABEL: funcE:
72; X86:       # %bb.0: # %allocas
73; X86-NEXT:    xorl %eax, %eax
74; X86-NEXT:    testb %al, %al
75; X86-NEXT:    # implicit-def: $ymm0
76; X86-NEXT:    jne .LBB4_2
77; X86-NEXT:  # %bb.1: # %load.i1247
78; X86-NEXT:    pushl %ebp
79; X86-NEXT:    movl %esp, %ebp
80; X86-NEXT:    andl $-32, %esp
81; X86-NEXT:    subl $1312, %esp # imm = 0x520
82; X86-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
83; X86-NEXT:    movl %ebp, %esp
84; X86-NEXT:    popl %ebp
85; X86-NEXT:  .LBB4_2: # %__load_and_broadcast_32.exit1249
86; X86-NEXT:    retl
87;
88; X64-LABEL: funcE:
89; X64:       # %bb.0: # %allocas
90; X64-NEXT:    xorl %eax, %eax
91; X64-NEXT:    testb %al, %al
92; X64-NEXT:    # implicit-def: $ymm0
93; X64-NEXT:    jne .LBB4_2
94; X64-NEXT:  # %bb.1: # %load.i1247
95; X64-NEXT:    pushq %rbp
96; X64-NEXT:    movq %rsp, %rbp
97; X64-NEXT:    andq $-32, %rsp
98; X64-NEXT:    subq $1312, %rsp # imm = 0x520
99; X64-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
100; X64-NEXT:    movq %rbp, %rsp
101; X64-NEXT:    popq %rbp
102; X64-NEXT:  .LBB4_2: # %__load_and_broadcast_32.exit1249
103; X64-NEXT:    retq
104allocas:
105  %udx495 = alloca [18 x [18 x float]], align 32
106  br label %for_test505.preheader
107
108for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
109  br i1 undef, label %for_exit499, label %for_test505.preheader
110
111for_exit499:                                      ; preds = %for_test505.preheader
112  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
113
114load.i1247:                                       ; preds = %for_exit499
115  %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
116  %ptr.i1237 = bitcast float* %ptr1227 to i32*
117  %val.i1238 = load i32, i32* %ptr.i1237, align 4
118  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
119  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
120  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
121  br label %__load_and_broadcast_32.exit1249
122
123__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
124  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
125  ret <8 x float> %load_broadcast12281250
126}
127
128define <8 x float> @funcF(i32 %val) nounwind {
129; X86-LABEL: funcF:
130; X86:       # %bb.0:
131; X86-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
132; X86-NEXT:    retl
133;
134; X64-LABEL: funcF:
135; X64:       # %bb.0:
136; X64-NEXT:    vmovd %edi, %xmm0
137; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
138; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
139; X64-NEXT:    retq
140  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
141  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
142  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
143  ret <8 x float> %tmp
144}
145
146define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
147; CHECK-LABEL: funcG:
148; CHECK:       # %bb.0: # %entry
149; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
150; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
151; CHECK-NEXT:    ret{{[l|q]}}
152entry:
153  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
154  ret <8 x float> %shuffle
155}
156
157define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
158; CHECK-LABEL: funcH:
159; CHECK:       # %bb.0: # %entry
160; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
161; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
162; CHECK-NEXT:    ret{{[l|q]}}
163entry:
164  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
165  ret <8 x float> %shuffle
166}
167
168define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
169; X86-LABEL: splat_load_2f64_11:
170; X86:       # %bb.0:
171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
172; X86-NEXT:    vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
173; X86-NEXT:    retl
174;
175; X64-LABEL: splat_load_2f64_11:
176; X64:       # %bb.0:
177; X64-NEXT:    vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
178; X64-NEXT:    retq
179  %x = load <2 x double>, <2 x double>* %ptr
180  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
181  ret <2 x double> %x1
182}
183
184define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
185; X86-LABEL: splat_load_4f64_2222:
186; X86:       # %bb.0:
187; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
188; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
189; X86-NEXT:    retl
190;
191; X64-LABEL: splat_load_4f64_2222:
192; X64:       # %bb.0:
193; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
194; X64-NEXT:    retq
195  %x = load <4 x double>, <4 x double>* %ptr
196  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
197  ret <4 x double> %x1
198}
199
200define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
201; X86-LABEL: splat_load_4f32_0000:
202; X86:       # %bb.0:
203; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-NEXT:    vbroadcastss (%eax), %xmm0
205; X86-NEXT:    retl
206;
207; X64-LABEL: splat_load_4f32_0000:
208; X64:       # %bb.0:
209; X64-NEXT:    vbroadcastss (%rdi), %xmm0
210; X64-NEXT:    retq
211  %x = load <4 x float>, <4 x float>* %ptr
212  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
213  ret <4 x float> %x1
214}
215
216define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
217; X86-LABEL: splat_load_8f32_77777777:
218; X86:       # %bb.0:
219; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
220; X86-NEXT:    vbroadcastss 28(%eax), %ymm0
221; X86-NEXT:    retl
222;
223; X64-LABEL: splat_load_8f32_77777777:
224; X64:       # %bb.0:
225; X64-NEXT:    vbroadcastss 28(%rdi), %ymm0
226; X64-NEXT:    retq
227  %x = load <8 x float>, <8 x float>* %ptr
228  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
229  ret <8 x float> %x1
230}
231