1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl| FileCheck %s
2
3; 256-bit
4
5; CHECK-LABEL: vpaddb256_test
6; CHECK: vpaddb %ymm{{.*}}
7; CHECK: ret
8define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
9  %x = add <32 x i8> %i, %j
10  ret <32 x i8> %x
11}
12
13; CHECK-LABEL: vpaddb256_fold_test
14; CHECK: vpaddb (%rdi), %ymm{{.*}}
15; CHECK: ret
16define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
17  %tmp = load <32 x i8>, <32 x i8>* %j, align 4
18  %x = add <32 x i8> %i, %tmp
19  ret <32 x i8> %x
20}
21
22; CHECK-LABEL: vpaddw256_test
23; CHECK: vpaddw %ymm{{.*}}
24; CHECK: ret
25define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
26  %x = add <16 x i16> %i, %j
27  ret <16 x i16> %x
28}
29
30; CHECK-LABEL: vpaddw256_fold_test
31; CHECK: vpaddw (%rdi), %ymm{{.*}}
32; CHECK: ret
33define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
34  %tmp = load <16 x i16>, <16 x i16>* %j, align 4
35  %x = add <16 x i16> %i, %tmp
36  ret <16 x i16> %x
37}
38
39; CHECK-LABEL: vpaddw256_mask_test
40; CHECK: vpaddw %ymm{{.*%k[1-7].*}}
41; CHECK: ret
42define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
43  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
44  %x = add <16 x i16> %i, %j
45  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
46  ret <16 x i16> %r
47}
48
49; CHECK-LABEL: vpaddw256_maskz_test
50; CHECK: vpaddw %ymm{{.*{%k[1-7]} {z}.*}}
51; CHECK: ret
52define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
53  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
54  %x = add <16 x i16> %i, %j
55  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
56  ret <16 x i16> %r
57}
58
59; CHECK-LABEL: vpaddw256_mask_fold_test
60; CHECK: vpaddw (%rdi), %ymm{{.*%k[1-7]}}
61; CHECK: ret
62define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
63  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
64  %j = load <16 x i16>, <16 x i16>* %j.ptr
65  %x = add <16 x i16> %i, %j
66  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
67  ret <16 x i16> %r
68}
69
70; CHECK-LABEL: vpaddw256_maskz_fold_test
71; CHECK: vpaddw (%rdi), %ymm{{.*{%k[1-7]} {z}}}
72; CHECK: ret
73define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
74  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
75  %j = load <16 x i16>, <16 x i16>* %j.ptr
76  %x = add <16 x i16> %i, %j
77  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
78  ret <16 x i16> %r
79}
80
81; CHECK-LABEL: vpsubb256_test
82; CHECK: vpsubb %ymm{{.*}}
83; CHECK: ret
84define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
85  %x = sub <32 x i8> %i, %j
86  ret <32 x i8> %x
87}
88
89; CHECK-LABEL: vpsubw256_test
90; CHECK: vpsubw %ymm{{.*}}
91; CHECK: ret
92define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
93  %x = sub <16 x i16> %i, %j
94  ret <16 x i16> %x
95}
96
97; CHECK-LABEL: vpmullw256_test
98; CHECK: vpmullw %ymm{{.*}}
99; CHECK: ret
100define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) {
101  %x = mul <16 x i16> %i, %j
102  ret <16 x i16> %x
103}
104
105; 128-bit
106
107; CHECK-LABEL: vpaddb128_test
108; CHECK: vpaddb %xmm{{.*}}
109; CHECK: ret
110define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
111  %x = add <16 x i8> %i, %j
112  ret <16 x i8> %x
113}
114
115; CHECK-LABEL: vpaddb128_fold_test
116; CHECK: vpaddb (%rdi), %xmm{{.*}}
117; CHECK: ret
118define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
119  %tmp = load <16 x i8>, <16 x i8>* %j, align 4
120  %x = add <16 x i8> %i, %tmp
121  ret <16 x i8> %x
122}
123
124; CHECK-LABEL: vpaddw128_test
125; CHECK: vpaddw %xmm{{.*}}
126; CHECK: ret
127define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
128  %x = add <8 x i16> %i, %j
129  ret <8 x i16> %x
130}
131
132; CHECK-LABEL: vpaddw128_fold_test
133; CHECK: vpaddw (%rdi), %xmm{{.*}}
134; CHECK: ret
135define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
136  %tmp = load <8 x i16>, <8 x i16>* %j, align 4
137  %x = add <8 x i16> %i, %tmp
138  ret <8 x i16> %x
139}
140
141; CHECK-LABEL: vpaddw128_mask_test
142; CHECK: vpaddw %xmm{{.*%k[1-7].*}}
143; CHECK: ret
144define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
145  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
146  %x = add <8 x i16> %i, %j
147  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
148  ret <8 x i16> %r
149}
150
151; CHECK-LABEL: vpaddw128_maskz_test
152; CHECK: vpaddw %xmm{{.*{%k[1-7]} {z}.*}}
153; CHECK: ret
154define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
155  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
156  %x = add <8 x i16> %i, %j
157  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
158  ret <8 x i16> %r
159}
160
161; CHECK-LABEL: vpaddw128_mask_fold_test
162; CHECK: vpaddw (%rdi), %xmm{{.*%k[1-7]}}
163; CHECK: ret
164define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
165  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
166  %j = load <8 x i16>, <8 x i16>* %j.ptr
167  %x = add <8 x i16> %i, %j
168  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
169  ret <8 x i16> %r
170}
171
172; CHECK-LABEL: vpaddw128_maskz_fold_test
173; CHECK: vpaddw (%rdi), %xmm{{.*{%k[1-7]} {z}}}
174; CHECK: ret
175define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
176  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
177  %j = load <8 x i16>, <8 x i16>* %j.ptr
178  %x = add <8 x i16> %i, %j
179  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
180  ret <8 x i16> %r
181}
182
183; CHECK-LABEL: vpsubb128_test
184; CHECK: vpsubb %xmm{{.*}}
185; CHECK: ret
186define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
187  %x = sub <16 x i8> %i, %j
188  ret <16 x i8> %x
189}
190
191; CHECK-LABEL: vpsubw128_test
192; CHECK: vpsubw %xmm{{.*}}
193; CHECK: ret
194define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
195  %x = sub <8 x i16> %i, %j
196  ret <8 x i16> %x
197}
198
199; CHECK-LABEL: vpmullw128_test
200; CHECK: vpmullw %xmm{{.*}}
201; CHECK: ret
202define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
203  %x = mul <8 x i16> %i, %j
204  ret <8 x i16> %x
205}
206
207