1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4
5
6define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
7; ALL-LABEL: vpandd:
8; ALL:       ## BB#0: ## %entry
9; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
10; ALL-NEXT:    vpandd %zmm1, %zmm0, %zmm0
11; ALL-NEXT:    retq
12entry:
13  ; Force the execution domain with an add.
14  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
15                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
16  %x = and <16 x i32> %a2, %b
17  ret <16 x i32> %x
18}
19
20define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
21; ALL-LABEL: vpandnd:
22; ALL:       ## BB#0: ## %entry
23; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
24; ALL-NEXT:    vpandnd %zmm0, %zmm1, %zmm0
25; ALL-NEXT:    retq
26entry:
27  ; Force the execution domain with an add.
28  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
29                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
30  %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
31                            i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
32  %x = and <16 x i32> %a2, %b2
33  ret <16 x i32> %x
34}
35
36define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
37; ALL-LABEL: vpord:
38; ALL:       ## BB#0: ## %entry
39; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
40; ALL-NEXT:    vpord %zmm1, %zmm0, %zmm0
41; ALL-NEXT:    retq
42entry:
43  ; Force the execution domain with an add.
44  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
45                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
46  %x = or <16 x i32> %a2, %b
47  ret <16 x i32> %x
48}
49
50define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
51; ALL-LABEL: vpxord:
52; ALL:       ## BB#0: ## %entry
53; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
54; ALL-NEXT:    vpxord %zmm1, %zmm0, %zmm0
55; ALL-NEXT:    retq
56entry:
57  ; Force the execution domain with an add.
58  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
59                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
60  %x = xor <16 x i32> %a2, %b
61  ret <16 x i32> %x
62}
63
64define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
65; ALL-LABEL: vpandq:
66; ALL:       ## BB#0: ## %entry
67; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
68; ALL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
69; ALL-NEXT:    retq
70entry:
71  ; Force the execution domain with an add.
72  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
73  %x = and <8 x i64> %a2, %b
74  ret <8 x i64> %x
75}
76
77define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
78; ALL-LABEL: vpandnq:
79; ALL:       ## BB#0: ## %entry
80; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
81; ALL-NEXT:    vpandnq %zmm0, %zmm1, %zmm0
82; ALL-NEXT:    retq
83entry:
84  ; Force the execution domain with an add.
85  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
86  %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
87  %x = and <8 x i64> %a2, %b2
88  ret <8 x i64> %x
89}
90
91define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
92; ALL-LABEL: vporq:
93; ALL:       ## BB#0: ## %entry
94; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
95; ALL-NEXT:    vporq %zmm1, %zmm0, %zmm0
96; ALL-NEXT:    retq
97entry:
98  ; Force the execution domain with an add.
99  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
100  %x = or <8 x i64> %a2, %b
101  ret <8 x i64> %x
102}
103
104define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
105; ALL-LABEL: vpxorq:
106; ALL:       ## BB#0: ## %entry
107; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
108; ALL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
109; ALL-NEXT:    retq
110entry:
111  ; Force the execution domain with an add.
112  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
113  %x = xor <8 x i64> %a2, %b
114  ret <8 x i64> %x
115}
116
117
118define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
119; ALL-LABEL: orq_broadcast:
120; ALL:       ## BB#0:
121; ALL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
122; ALL-NEXT:    retq
123  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
124  ret <8 x i64> %b
125}
126
127define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
128; ALL-LABEL: andd512fold:
129; ALL:       ## BB#0: ## %entry
130; ALL-NEXT:    vpandd (%rdi), %zmm0, %zmm0
131; ALL-NEXT:    retq
132entry:
133  %a = load <16 x i32>, <16 x i32>* %x, align 4
134  %b = and <16 x i32> %y, %a
135  ret <16 x i32> %b
136}
137
138define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
139; ALL-LABEL: andqbrst:
140; ALL:       ## BB#0: ## %entry
141; ALL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
142; ALL-NEXT:    retq
143entry:
144  %a = load i64, i64* %ap, align 8
145  %b = insertelement <8 x i64> undef, i64 %a, i32 0
146  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
147  %d = and <8 x i64> %p1, %c
148  ret <8 x i64>%d
149}
150
151define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
152; KNL-LABEL: and_v64i8:
153; KNL:       ## BB#0:
154; KNL-NEXT:    vandps %ymm2, %ymm0, %ymm0
155; KNL-NEXT:    vandps %ymm3, %ymm1, %ymm1
156; KNL-NEXT:    retq
157;
158; SKX-LABEL: and_v64i8:
159; SKX:       ## BB#0:
160; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0
161; SKX-NEXT:    retq
162  %res = and <64 x i8> %a, %b
163  ret <64 x i8> %res
164}
165
166define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
167; KNL-LABEL: andn_v64i8:
168; KNL:       ## BB#0:
169; KNL-NEXT:    vandnps %ymm0, %ymm2, %ymm0
170; KNL-NEXT:    vandnps %ymm1, %ymm3, %ymm1
171; KNL-NEXT:    retq
172;
173; SKX-LABEL: andn_v64i8:
174; SKX:       ## BB#0:
175; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0
176; SKX-NEXT:    retq
177  %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
178                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
179                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
180                           i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
181  %res = and <64 x i8> %a, %b2
182  ret <64 x i8> %res
183}
184
185define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
186; KNL-LABEL: or_v64i8:
187; KNL:       ## BB#0:
188; KNL-NEXT:    vorps %ymm2, %ymm0, %ymm0
189; KNL-NEXT:    vorps %ymm3, %ymm1, %ymm1
190; KNL-NEXT:    retq
191;
192; SKX-LABEL: or_v64i8:
193; SKX:       ## BB#0:
194; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
195; SKX-NEXT:    retq
196  %res = or <64 x i8> %a, %b
197  ret <64 x i8> %res
198}
199
200define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
201; KNL-LABEL: xor_v64i8:
202; KNL:       ## BB#0:
203; KNL-NEXT:    vxorps %ymm2, %ymm0, %ymm0
204; KNL-NEXT:    vxorps %ymm3, %ymm1, %ymm1
205; KNL-NEXT:    retq
206;
207; SKX-LABEL: xor_v64i8:
208; SKX:       ## BB#0:
209; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
210; SKX-NEXT:    retq
211  %res = xor <64 x i8> %a, %b
212  ret <64 x i8> %res
213}
214
215define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
216; KNL-LABEL: and_v32i16:
217; KNL:       ## BB#0:
218; KNL-NEXT:    vandps %ymm2, %ymm0, %ymm0
219; KNL-NEXT:    vandps %ymm3, %ymm1, %ymm1
220; KNL-NEXT:    retq
221;
222; SKX-LABEL: and_v32i16:
223; SKX:       ## BB#0:
224; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0
225; SKX-NEXT:    retq
226  %res = and <32 x i16> %a, %b
227  ret <32 x i16> %res
228}
229
230define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
231; KNL-LABEL: andn_v32i16:
232; KNL:       ## BB#0:
233; KNL-NEXT:    vandnps %ymm0, %ymm2, %ymm0
234; KNL-NEXT:    vandnps %ymm1, %ymm3, %ymm1
235; KNL-NEXT:    retq
236;
237; SKX-LABEL: andn_v32i16:
238; SKX:       ## BB#0:
239; SKX-NEXT:    vpandnq %zmm0, %zmm1, %zmm0
240; SKX-NEXT:    retq
241  %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
242                            i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
243  %res = and <32 x i16> %a, %b2
244  ret <32 x i16> %res
245}
246
247define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
248; KNL-LABEL: or_v32i16:
249; KNL:       ## BB#0:
250; KNL-NEXT:    vorps %ymm2, %ymm0, %ymm0
251; KNL-NEXT:    vorps %ymm3, %ymm1, %ymm1
252; KNL-NEXT:    retq
253;
254; SKX-LABEL: or_v32i16:
255; SKX:       ## BB#0:
256; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
257; SKX-NEXT:    retq
258  %res = or <32 x i16> %a, %b
259  ret <32 x i16> %res
260}
261
262define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
263; KNL-LABEL: xor_v32i16:
264; KNL:       ## BB#0:
265; KNL-NEXT:    vxorps %ymm2, %ymm0, %ymm0
266; KNL-NEXT:    vxorps %ymm3, %ymm1, %ymm1
267; KNL-NEXT:    retq
268;
269; SKX-LABEL: xor_v32i16:
270; SKX:       ## BB#0:
271; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
272; SKX-NEXT:    retq
273  %res = xor <32 x i16> %a, %b
274  ret <32 x i16> %res
275}
276