1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
4
5; Check constant loads of every 128-bit and 256-bit vector type
6; for size optimization using splat ops available with AVX and AVX2.
7
8; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
9define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
10; CHECK-LABEL: splat_v2f64:
11; CHECK:       # BB#0:
12; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
13; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
14; CHECK-NEXT:    retq
15  %add = fadd <2 x double> %x, <double 1.0, double 1.0>
16  ret <2 x double> %add
17}
18
19define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
20; CHECK-LABEL: splat_v4f64:
21; CHECK:       # BB#0:
22; CHECK-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
23; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
24; CHECK-NEXT:    retq
25  %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
26  ret <4 x double> %add
27}
28
29define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
30; CHECK-LABEL: splat_v4f32:
31; CHECK:       # BB#0:
32; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
33; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
34; CHECK-NEXT:    retq
35  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
36  ret <4 x float> %add
37}
38
39define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
40; CHECK-LABEL: splat_v8f32:
41; CHECK:       # BB#0:
42; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
43; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
44; CHECK-NEXT:    retq
45  %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
46  ret <8 x float> %add
47}
48
49; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
50; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
51define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
52; CHECK-LABEL: splat_v2i64:
53; CHECK:       # BB#0:
54; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
55; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
56; CHECK-NEXT:    retq
57  %add = add <2 x i64> %x, <i64 1, i64 1>
58  ret <2 x i64> %add
59}
60
61; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
62; and then we fake it: use vmovddup to splat 64-bit value.
63define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
64; AVX-LABEL: splat_v4i64:
65; AVX:       # BB#0:
66; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
67; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
68; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
69; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
70; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
71; AVX-NEXT:    retq
72;
73; AVX2-LABEL: splat_v4i64:
74; AVX2:       # BB#0:
75; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
76; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
77; AVX2-NEXT:    retq
78  %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
79  ret <4 x i64> %add
80}
81
82; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
83define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
84; AVX-LABEL: splat_v4i32:
85; AVX:       # BB#0:
86; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
87; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
88; AVX-NEXT:    retq
89;
90; AVX2-LABEL: splat_v4i32:
91; AVX2:       # BB#0:
92; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
93; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
94; AVX2-NEXT:    retq
95  %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
96  ret <4 x i32> %add
97}
98
99; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
100define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
101; AVX-LABEL: splat_v8i32:
102; AVX:       # BB#0:
103; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
104; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
105; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
106; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
107; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
108; AVX-NEXT:    retq
109;
110; AVX2-LABEL: splat_v8i32:
111; AVX2:       # BB#0:
112; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
113; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
114; AVX2-NEXT:    retq
115  %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
116  ret <8 x i32> %add
117}
118
119; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
120define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
121; AVX-LABEL: splat_v8i16:
122; AVX:       # BB#0:
123; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
124; AVX-NEXT:    retq
125;
126; AVX2-LABEL: splat_v8i16:
127; AVX2:       # BB#0:
128; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm1
129; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
130; AVX2-NEXT:    retq
131  %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
132  ret <8 x i16> %add
133}
134
135; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
136define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
137; AVX-LABEL: splat_v16i16:
138; AVX:       # BB#0:
139; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
140; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
141; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
142; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
143; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
144; AVX-NEXT:    retq
145;
146; AVX2-LABEL: splat_v16i16:
147; AVX2:       # BB#0:
148; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm1
149; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
150; AVX2-NEXT:    retq
151  %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
152  ret <16 x i16> %add
153}
154
155; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
156define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
157; AVX-LABEL: splat_v16i8:
158; AVX:       # BB#0:
159; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
160; AVX-NEXT:    retq
161;
162; AVX2-LABEL: splat_v16i8:
163; AVX2:       # BB#0:
164; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %xmm1
165; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
166; AVX2-NEXT:    retq
167  %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
168  ret <16 x i8> %add
169}
170
171; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
172define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
173; AVX-LABEL: splat_v32i8:
174; AVX:       # BB#0:
175; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
176; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
177; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
178; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
179; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
180; AVX-NEXT:    retq
181;
182; AVX2-LABEL: splat_v32i8:
183; AVX2:       # BB#0:
184; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %ymm1
185; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
186; AVX2-NEXT:    retq
187  %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
188  ret <32 x i8> %add
189}
190
191; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
192; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
193; loadi64 with multiple uses.
194
195@A = common global <3 x i64> zeroinitializer, align 32
196
197define <8 x i64> @pr23259() #1 {
198entry:
199  %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
200  %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
201  %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
202  ret <8 x i64> %shuffle
203}
204
205attributes #0 = { optsize }
206attributes #1 = { minsize }
207