1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3
4; https://bugs.llvm.org/show_bug.cgi?id=37104
5
6; All the advanced stuff (negative tests, commutativity) is handled in the
7; scalar version of the test only.
8
9; ============================================================================ ;
10; 8-bit vector width
11; ============================================================================ ;
12
13define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
14; CHECK-LABEL: out_v1i8:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
17; CHECK-NEXT:    mov v0.16b, v2.16b
18; CHECK-NEXT:    ret
19  %mx = and <1 x i8> %x, %mask
20  %notmask = xor <1 x i8> %mask, <i8 -1>
21  %my = and <1 x i8> %y, %notmask
22  %r = or <1 x i8> %mx, %my
23  ret <1 x i8> %r
24}
25
26; ============================================================================ ;
27; 16-bit vector width
28; ============================================================================ ;
29
30define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
31; CHECK-LABEL: out_v2i8:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    movi d3, #0x0000ff000000ff
34; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
35; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
36; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
37; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
38; CHECK-NEXT:    ret
39  %mx = and <2 x i8> %x, %mask
40  %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1>
41  %my = and <2 x i8> %y, %notmask
42  %r = or <2 x i8> %mx, %my
43  ret <2 x i8> %r
44}
45
46define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
47; CHECK-LABEL: out_v1i16:
48; CHECK:       // %bb.0:
49; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
50; CHECK-NEXT:    mov v0.16b, v2.16b
51; CHECK-NEXT:    ret
52  %mx = and <1 x i16> %x, %mask
53  %notmask = xor <1 x i16> %mask, <i16 -1>
54  %my = and <1 x i16> %y, %notmask
55  %r = or <1 x i16> %mx, %my
56  ret <1 x i16> %r
57}
58
59; ============================================================================ ;
60; 32-bit vector width
61; ============================================================================ ;
62
63define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
64; CHECK-LABEL: out_v4i8:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    movi d3, #0xff00ff00ff00ff
67; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
68; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
69; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
70; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
71; CHECK-NEXT:    ret
72  %mx = and <4 x i8> %x, %mask
73  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1>
74  %my = and <4 x i8> %y, %notmask
75  %r = or <4 x i8> %mx, %my
76  ret <4 x i8> %r
77}
78
79define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
80; CHECK-LABEL: out_v4i8_undef:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    movi d3, #0xff00ff00ff00ff
83; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
84; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
85; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
86; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
87; CHECK-NEXT:    ret
88  %mx = and <4 x i8> %x, %mask
89  %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1>
90  %my = and <4 x i8> %y, %notmask
91  %r = or <4 x i8> %mx, %my
92  ret <4 x i8> %r
93}
94
95define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
96; CHECK-LABEL: out_v2i16:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    movi d3, #0x00ffff0000ffff
99; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
100; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
101; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
102; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
103; CHECK-NEXT:    ret
104  %mx = and <2 x i16> %x, %mask
105  %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1>
106  %my = and <2 x i16> %y, %notmask
107  %r = or <2 x i16> %mx, %my
108  ret <2 x i16> %r
109}
110
111define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
112; CHECK-LABEL: out_v1i32:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
115; CHECK-NEXT:    mov v0.16b, v2.16b
116; CHECK-NEXT:    ret
117  %mx = and <1 x i32> %x, %mask
118  %notmask = xor <1 x i32> %mask, <i32 -1>
119  %my = and <1 x i32> %y, %notmask
120  %r = or <1 x i32> %mx, %my
121  ret <1 x i32> %r
122}
123
124; ============================================================================ ;
125; 64-bit vector width
126; ============================================================================ ;
127
128define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
129; CHECK-LABEL: out_v8i8:
130; CHECK:       // %bb.0:
131; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
132; CHECK-NEXT:    mov v0.16b, v2.16b
133; CHECK-NEXT:    ret
134  %mx = and <8 x i8> %x, %mask
135  %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
136  %my = and <8 x i8> %y, %notmask
137  %r = or <8 x i8> %mx, %my
138  ret <8 x i8> %r
139}
140
141define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
142; CHECK-LABEL: out_v4i16:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
145; CHECK-NEXT:    mov v0.16b, v2.16b
146; CHECK-NEXT:    ret
147  %mx = and <4 x i16> %x, %mask
148  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1>
149  %my = and <4 x i16> %y, %notmask
150  %r = or <4 x i16> %mx, %my
151  ret <4 x i16> %r
152}
153
154define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
155; CHECK-LABEL: out_v4i16_undef:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
158; CHECK-NEXT:    mov v0.16b, v2.16b
159; CHECK-NEXT:    ret
160  %mx = and <4 x i16> %x, %mask
161  %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1>
162  %my = and <4 x i16> %y, %notmask
163  %r = or <4 x i16> %mx, %my
164  ret <4 x i16> %r
165}
166
167define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
168; CHECK-LABEL: out_v2i32:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
171; CHECK-NEXT:    mov v0.16b, v2.16b
172; CHECK-NEXT:    ret
173  %mx = and <2 x i32> %x, %mask
174  %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1>
175  %my = and <2 x i32> %y, %notmask
176  %r = or <2 x i32> %mx, %my
177  ret <2 x i32> %r
178}
179
180define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
181; CHECK-LABEL: out_v1i64:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
184; CHECK-NEXT:    mov v0.16b, v2.16b
185; CHECK-NEXT:    ret
186  %mx = and <1 x i64> %x, %mask
187  %notmask = xor <1 x i64> %mask, <i64 -1>
188  %my = and <1 x i64> %y, %notmask
189  %r = or <1 x i64> %mx, %my
190  ret <1 x i64> %r
191}
192
193; ============================================================================ ;
194; 128-bit vector width
195; ============================================================================ ;
196
197define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
198; CHECK-LABEL: out_v16i8:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
201; CHECK-NEXT:    mov v0.16b, v2.16b
202; CHECK-NEXT:    ret
203  %mx = and <16 x i8> %x, %mask
204  %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
205  %my = and <16 x i8> %y, %notmask
206  %r = or <16 x i8> %mx, %my
207  ret <16 x i8> %r
208}
209
210define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
211; CHECK-LABEL: out_v8i16:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
214; CHECK-NEXT:    mov v0.16b, v2.16b
215; CHECK-NEXT:    ret
216  %mx = and <8 x i16> %x, %mask
217  %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
218  %my = and <8 x i16> %y, %notmask
219  %r = or <8 x i16> %mx, %my
220  ret <8 x i16> %r
221}
222
223define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
224; CHECK-LABEL: out_v4i32:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
227; CHECK-NEXT:    mov v0.16b, v2.16b
228; CHECK-NEXT:    ret
229  %mx = and <4 x i32> %x, %mask
230  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
231  %my = and <4 x i32> %y, %notmask
232  %r = or <4 x i32> %mx, %my
233  ret <4 x i32> %r
234}
235
236define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
237; CHECK-LABEL: out_v4i32_undef:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
240; CHECK-NEXT:    mov v0.16b, v2.16b
241; CHECK-NEXT:    ret
242  %mx = and <4 x i32> %x, %mask
243  %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1>
244  %my = and <4 x i32> %y, %notmask
245  %r = or <4 x i32> %mx, %my
246  ret <4 x i32> %r
247}
248
249define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
250; CHECK-LABEL: out_v2i64:
251; CHECK:       // %bb.0:
252; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
253; CHECK-NEXT:    mov v0.16b, v2.16b
254; CHECK-NEXT:    ret
255  %mx = and <2 x i64> %x, %mask
256  %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1>
257  %my = and <2 x i64> %y, %notmask
258  %r = or <2 x i64> %mx, %my
259  ret <2 x i64> %r
260}
261
262;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
263; Should be the same as the previous one.
264;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
265
266; ============================================================================ ;
267; 8-bit vector width
268; ============================================================================ ;
269
270define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind {
271; CHECK-LABEL: in_v1i8:
272; CHECK:       // %bb.0:
273; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
274; CHECK-NEXT:    mov v0.16b, v2.16b
275; CHECK-NEXT:    ret
276  %n0 = xor <1 x i8> %x, %y
277  %n1 = and <1 x i8> %n0, %mask
278  %r = xor <1 x i8> %n1, %y
279  ret <1 x i8> %r
280}
281
282; ============================================================================ ;
283; 16-bit vector width
284; ============================================================================ ;
285
286define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind {
287; CHECK-LABEL: in_v2i8:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
290; CHECK-NEXT:    mov v0.16b, v2.16b
291; CHECK-NEXT:    ret
292  %n0 = xor <2 x i8> %x, %y
293  %n1 = and <2 x i8> %n0, %mask
294  %r = xor <2 x i8> %n1, %y
295  ret <2 x i8> %r
296}
297
298define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind {
299; CHECK-LABEL: in_v1i16:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
302; CHECK-NEXT:    mov v0.16b, v2.16b
303; CHECK-NEXT:    ret
304  %n0 = xor <1 x i16> %x, %y
305  %n1 = and <1 x i16> %n0, %mask
306  %r = xor <1 x i16> %n1, %y
307  ret <1 x i16> %r
308}
309
310; ============================================================================ ;
311; 32-bit vector width
312; ============================================================================ ;
313
314define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind {
315; CHECK-LABEL: in_v4i8:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
318; CHECK-NEXT:    mov v0.16b, v2.16b
319; CHECK-NEXT:    ret
320  %n0 = xor <4 x i8> %x, %y
321  %n1 = and <4 x i8> %n0, %mask
322  %r = xor <4 x i8> %n1, %y
323  ret <4 x i8> %r
324}
325
326define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind {
327; CHECK-LABEL: in_v2i16:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
330; CHECK-NEXT:    mov v0.16b, v2.16b
331; CHECK-NEXT:    ret
332  %n0 = xor <2 x i16> %x, %y
333  %n1 = and <2 x i16> %n0, %mask
334  %r = xor <2 x i16> %n1, %y
335  ret <2 x i16> %r
336}
337
338define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind {
339; CHECK-LABEL: in_v1i32:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
342; CHECK-NEXT:    mov v0.16b, v2.16b
343; CHECK-NEXT:    ret
344  %n0 = xor <1 x i32> %x, %y
345  %n1 = and <1 x i32> %n0, %mask
346  %r = xor <1 x i32> %n1, %y
347  ret <1 x i32> %r
348}
349
350; ============================================================================ ;
351; 64-bit vector width
352; ============================================================================ ;
353
354define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind {
355; CHECK-LABEL: in_v8i8:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
358; CHECK-NEXT:    mov v0.16b, v2.16b
359; CHECK-NEXT:    ret
360  %n0 = xor <8 x i8> %x, %y
361  %n1 = and <8 x i8> %n0, %mask
362  %r = xor <8 x i8> %n1, %y
363  ret <8 x i8> %r
364}
365
366define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind {
367; CHECK-LABEL: in_v4i16:
368; CHECK:       // %bb.0:
369; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
370; CHECK-NEXT:    mov v0.16b, v2.16b
371; CHECK-NEXT:    ret
372  %n0 = xor <4 x i16> %x, %y
373  %n1 = and <4 x i16> %n0, %mask
374  %r = xor <4 x i16> %n1, %y
375  ret <4 x i16> %r
376}
377
378define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind {
379; CHECK-LABEL: in_v2i32:
380; CHECK:       // %bb.0:
381; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
382; CHECK-NEXT:    mov v0.16b, v2.16b
383; CHECK-NEXT:    ret
384  %n0 = xor <2 x i32> %x, %y
385  %n1 = and <2 x i32> %n0, %mask
386  %r = xor <2 x i32> %n1, %y
387  ret <2 x i32> %r
388}
389
390define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind {
391; CHECK-LABEL: in_v1i64:
392; CHECK:       // %bb.0:
393; CHECK-NEXT:    bsl v2.8b, v0.8b, v1.8b
394; CHECK-NEXT:    mov v0.16b, v2.16b
395; CHECK-NEXT:    ret
396  %n0 = xor <1 x i64> %x, %y
397  %n1 = and <1 x i64> %n0, %mask
398  %r = xor <1 x i64> %n1, %y
399  ret <1 x i64> %r
400}
401
402; ============================================================================ ;
403; 128-bit vector width
404; ============================================================================ ;
405
406define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind {
407; CHECK-LABEL: in_v16i8:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
410; CHECK-NEXT:    mov v0.16b, v2.16b
411; CHECK-NEXT:    ret
412  %n0 = xor <16 x i8> %x, %y
413  %n1 = and <16 x i8> %n0, %mask
414  %r = xor <16 x i8> %n1, %y
415  ret <16 x i8> %r
416}
417
418define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind {
419; CHECK-LABEL: in_v8i16:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
422; CHECK-NEXT:    mov v0.16b, v2.16b
423; CHECK-NEXT:    ret
424  %n0 = xor <8 x i16> %x, %y
425  %n1 = and <8 x i16> %n0, %mask
426  %r = xor <8 x i16> %n1, %y
427  ret <8 x i16> %r
428}
429
430define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind {
431; CHECK-LABEL: in_v4i32:
432; CHECK:       // %bb.0:
433; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
434; CHECK-NEXT:    mov v0.16b, v2.16b
435; CHECK-NEXT:    ret
436  %n0 = xor <4 x i32> %x, %y
437  %n1 = and <4 x i32> %n0, %mask
438  %r = xor <4 x i32> %n1, %y
439  ret <4 x i32> %r
440}
441
442define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind {
443; CHECK-LABEL: in_v2i64:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    bsl v2.16b, v0.16b, v1.16b
446; CHECK-NEXT:    mov v0.16b, v2.16b
447; CHECK-NEXT:    ret
448  %n0 = xor <2 x i64> %x, %y
449  %n1 = and <2 x i64> %n0, %mask
450  %r = xor <2 x i64> %n1, %y
451  ret <2 x i64> %r
452}
453