1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=AVX2
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=AVX2
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=AVX512VL
6
7; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
8
9define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
10; AVX2-LABEL: test_x86_avx2_paddus_b:
11; AVX2:       ## %bb.0:
12; AVX2-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]
13; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
14;
15; AVX512VL-LABEL: test_x86_avx2_paddus_b:
16; AVX512VL:       ## %bb.0:
17; AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
18; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
19  %1 = add <32 x i8> %a0, %a1
20  %2 = icmp ugt <32 x i8> %a0, %1
21  %3 = select <32 x i1> %2, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %1
22  ret <32 x i8> %3
23}
24
25define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
26; AVX2-LABEL: test_x86_avx2_paddus_w:
27; AVX2:       ## %bb.0:
28; AVX2-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]
29; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
30;
31; AVX512VL-LABEL: test_x86_avx2_paddus_w:
32; AVX512VL:       ## %bb.0:
33; AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
34; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
35  %1 = add <16 x i16> %a0, %a1
36  %2 = icmp ugt <16 x i16> %a0, %1
37  %3 = select <16 x i1> %2, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %1
38  ret <16 x i16> %3
39}
40
41define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
42; AVX2-LABEL: test_x86_avx2_psubus_b:
43; AVX2:       ## %bb.0:
44; AVX2-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]
45; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
46;
47; AVX512VL-LABEL: test_x86_avx2_psubus_b:
48; AVX512VL:       ## %bb.0:
49; AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
50; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
51  %cmp = icmp ugt <32 x i8> %a0, %a1
52  %sel = select <32 x i1> %cmp, <32 x i8> %a0, <32 x i8> %a1
53  %sub = sub <32 x i8> %sel, %a1
54  ret <32 x i8> %sub
55}
56
57define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
58; AVX2-LABEL: test_x86_avx2_psubus_w:
59; AVX2:       ## %bb.0:
60; AVX2-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]
61; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
62;
63; AVX512VL-LABEL: test_x86_avx2_psubus_w:
64; AVX512VL:       ## %bb.0:
65; AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
66; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
67  %cmp = icmp ugt <16 x i16> %a0, %a1
68  %sel = select <16 x i1> %cmp, <16 x i16> %a0, <16 x i16> %a1
69  %sub = sub <16 x i16> %sel, %a1
70  ret <16 x i16> %sub
71}
72
73define <32 x i16> @test_x86_avx2_paddus_w_512(<32 x i16> %a, <32 x i16> %b) {
74; AVX2-LABEL: test_x86_avx2_paddus_w_512:
75; AVX2:       ## %bb.0:
76; AVX2-NEXT:    vpaddusw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc2]
77; AVX2-NEXT:    vpaddusw %ymm3, %ymm1, %ymm1 ## encoding: [0xc5,0xf5,0xdd,0xcb]
78; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
79;
80; AVX512VL-LABEL: test_x86_avx2_paddus_w_512:
81; AVX512VL:       ## %bb.0:
82; AVX512VL-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1]
83; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
84  %1 = add <32 x i16> %a, %b
85  %2 = icmp ugt <32 x i16> %a, %1
86  %3 = select <32 x i1> %2, <32 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <32 x i16> %1
87  ret <32 x i16> %3
88}
89
90define <32 x i16> @test_x86_avx2_psubus_w_512(<32 x i16> %a, <32 x i16> %b) {
91; AVX2-LABEL: test_x86_avx2_psubus_w_512:
92; AVX2:       ## %bb.0:
93; AVX2-NEXT:    vpsubusw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc2]
94; AVX2-NEXT:    vpsubusw %ymm3, %ymm1, %ymm1 ## encoding: [0xc5,0xf5,0xd9,0xcb]
95; AVX2-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
96;
97; AVX512VL-LABEL: test_x86_avx2_psubus_w_512:
98; AVX512VL:       ## %bb.0:
99; AVX512VL-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1]
100; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
101  %cmp = icmp ugt <32 x i16> %a, %b
102  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
103  %sub = sub <32 x i16> %sel, %b
104  ret <32 x i16> %sub
105}
106