1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
3
4declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
5
6declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
7
8define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
9; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
10; CHECK:       ## BB#0:
11; CHECK-NEXT:    kmovw %edi, %k1
12; CHECK-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
13; CHECK-NEXT:    vplzcntd %xmm0, %xmm2 {%k1} {z}
14; CHECK-NEXT:    vplzcntd %xmm0, %xmm0
15; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
16; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
17; CHECK-NEXT:    retq
18  %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
19  %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
20  %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
21  %res2 = add <4 x i32> %res, %res1
22  %res4 = add <4 x i32> %res2, %res3
23  ret <4 x i32> %res4
24}
25
26declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
27
28define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
29; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
30; CHECK:       ## BB#0:
31; CHECK-NEXT:    kmovw %edi, %k1
32; CHECK-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
33; CHECK-NEXT:    vplzcntd %ymm0, %ymm0
34; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
35; CHECK-NEXT:    retq
36  %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
37  %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
38  %res2 = add <8 x i32> %res, %res1
39  ret <8 x i32> %res2
40}
41
42declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
43
44define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
45; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
46; CHECK:       ## BB#0:
47; CHECK-NEXT:    kmovw %edi, %k1
48; CHECK-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
49; CHECK-NEXT:    vplzcntq %xmm0, %xmm0
50; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
51; CHECK-NEXT:    retq
52  %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
53  %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
54  %res2 = add <2 x i64> %res, %res1
55  ret <2 x i64> %res2
56}
57
58declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
59
60define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
61; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
62; CHECK:       ## BB#0:
63; CHECK-NEXT:    kmovw %edi, %k1
64; CHECK-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
65; CHECK-NEXT:    vplzcntq %ymm0, %ymm0
66; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
67; CHECK-NEXT:    retq
68  %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
69  %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
70  %res2 = add <4 x i64> %res, %res1
71  ret <4 x i64> %res2
72}
73
74declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
75
76define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
77; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
78; CHECK:       ## BB#0:
79; CHECK-NEXT:    kmovw %edi, %k1
80; CHECK-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
81; CHECK-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
82; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
83; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
84; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
85; CHECK-NEXT:    retq
86  %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
87  %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
88  %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
89  %res2 = add <4 x i32> %res, %res1
90  %res4 = add <4 x i32> %res2, %res3
91  ret <4 x i32> %res4
92}
93
94declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
95
96define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
97; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
98; CHECK:       ## BB#0:
99; CHECK-NEXT:    kmovw %edi, %k1
100; CHECK-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
101; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
102; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
103; CHECK-NEXT:    retq
104  %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
105  %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
106  %res2 = add <8 x i32> %res, %res1
107  ret <8 x i32> %res2
108}
109
110declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
111
112define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
113; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
114; CHECK:       ## BB#0:
115; CHECK-NEXT:    kmovw %edi, %k1
116; CHECK-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
117; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
118; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
119; CHECK-NEXT:    retq
120  %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
121  %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
122  %res2 = add <2 x i64> %res, %res1
123  ret <2 x i64> %res2
124}
125
126declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
127
128define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
129; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
130; CHECK:       ## BB#0:
131; CHECK-NEXT:    kmovw %edi, %k1
132; CHECK-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
133; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
134; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
135; CHECK-NEXT:    retq
136  %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
137  %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
138  %res2 = add <4 x i64> %res, %res1
139  ret <4 x i64> %res2
140}
141
142define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
143; CHECK-LABEL: test_x86_vbroadcastmw_256:
144; CHECK:       ## BB#0:
145; CHECK-NEXT:    kmovw %edi, %k0
146; CHECK-NEXT:    vpbroadcastmw2d %k0, %ymm0
147; CHECK-NEXT:    retq
148  %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
149  ret <8 x i32> %res
150}
151declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
152
153define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
154; CHECK-LABEL: test_x86_vbroadcastmw_128:
155; CHECK:       ## BB#0:
156; CHECK-NEXT:    kmovw %edi, %k0
157; CHECK-NEXT:    vpbroadcastmw2d %k0, %xmm0
158; CHECK-NEXT:    retq
159  %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
160  ret <4 x i32> %res
161}
162declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
163
164define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
165; CHECK-LABEL: test_x86_broadcastmb_256:
166; CHECK:       ## BB#0:
167; CHECK-NEXT:    kmovw %edi, %k0
168; CHECK-NEXT:    vpbroadcastmb2q %k0, %ymm0
169; CHECK-NEXT:    retq
170  %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
171  ret <4 x i64> %res
172}
173declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
174
175define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
176; CHECK-LABEL: test_x86_broadcastmb_128:
177; CHECK:       ## BB#0:
178; CHECK-NEXT:    kmovw %edi, %k0
179; CHECK-NEXT:    vpbroadcastmb2q %k0, %xmm0
180; CHECK-NEXT:    retq
181  %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
182  ret <2 x i64> %res
183}
184declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
185