1; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX2
3
4; Check constant loads of every 128-bit and 256-bit vector type
5; for size optimization using splat ops available with AVX and AVX2.
6
7; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
8define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
9  %add = fadd <2 x double> %x, <double 1.0, double 1.0>
10  ret <2 x double> %add
11; CHECK-LABEL: splat_v2f64
12; CHECK: vmovddup
13; CHECK: vaddpd
14; CHECK-NEXT: retq
15}
16
17define <4 x double> @splat_v4f64(<4 x double> %x) #0 {
18  %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
19  ret <4 x double> %add
20; CHECK-LABEL: splat_v4f64
21; CHECK: vbroadcastsd
22; CHECK-NEXT: vaddpd
23; CHECK-NEXT: retq
24}
25
26define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
27  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
28  ret <4 x float> %add
29; CHECK-LABEL: splat_v4f32
30; CHECK: vbroadcastss
31; CHECK-NEXT: vaddps
32; CHECK-NEXT: retq
33}
34
35define <8 x float> @splat_v8f32(<8 x float> %x) #0 {
36  %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
37  ret <8 x float> %add
38; CHECK-LABEL: splat_v8f32
39; CHECK: vbroadcastss
40; CHECK-NEXT: vaddps
41; CHECK-NEXT: retq
42}
43
44; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
45; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
46define <2 x i64> @splat_v2i64(<2 x i64> %x) #0 {
47  %add = add <2 x i64> %x, <i64 1, i64 1>
48  ret <2 x i64> %add
49; CHECK-LABEL: splat_v2i64
50; CHECK: vmovddup
51; CHECK: vpaddq
52; CHECK-NEXT: retq
53}
54
55; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
56; and then we fake it: use vmovddup to splat 64-bit value.
57define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
58  %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
59  ret <4 x i64> %add
60; CHECK-LABEL: splat_v4i64
61; AVX: vmovddup
62; AVX: vpaddq
63; AVX: vpaddq
64; AVX2: vpbroadcastq
65; AVX2: vpaddq
66; CHECK: retq
67}
68
69; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
70define <4 x i32> @splat_v4i32(<4 x i32> %x) #0 {
71  %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
72  ret <4 x i32> %add
73; CHECK-LABEL: splat_v4i32
74; AVX: vbroadcastss
75; AVX2: vpbroadcastd
76; CHECK-NEXT: vpaddd
77; CHECK-NEXT: retq
78}
79
80; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
81define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
82  %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
83  ret <8 x i32> %add
84; CHECK-LABEL: splat_v8i32
85; AVX: vbroadcastss
86; AVX: vpaddd
87; AVX: vpaddd
88; AVX2: vpbroadcastd
89; AVX2: vpaddd
90; CHECK: retq
91}
92
93; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
94define <8 x i16> @splat_v8i16(<8 x i16> %x) #0 {
95  %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
96  ret <8 x i16> %add
97; CHECK-LABEL: splat_v8i16
98; AVX-NOT: broadcast
99; AVX2: vpbroadcastw
100; CHECK: vpaddw
101; CHECK-NEXT: retq
102}
103
104; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
105define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
106  %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
107  ret <16 x i16> %add
108; CHECK-LABEL: splat_v16i16
109; AVX-NOT: broadcast
110; AVX: vpaddw
111; AVX: vpaddw
112; AVX2: vpbroadcastw
113; AVX2: vpaddw
114; CHECK: retq
115}
116
117; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
118define <16 x i8> @splat_v16i8(<16 x i8> %x) #0 {
119  %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
120  ret <16 x i8> %add
121; CHECK-LABEL: splat_v16i8
122; AVX-NOT: broadcast
123; AVX2: vpbroadcastb
124; CHECK: vpaddb
125; CHECK-NEXT: retq
126}
127
128; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
129define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
130  %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
131  ret <32 x i8> %add
132; CHECK-LABEL: splat_v32i8
133; AVX-NOT: broadcast
134; AVX: vpaddb
135; AVX: vpaddb
136; AVX2: vpbroadcastb
137; AVX2: vpaddb
138; CHECK: retq
139}
140
141attributes #0 = { optsize }
142