1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 3declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone 4; CHECK-LABEL: test_kortestz 5; CHECK: kortestw 6; CHECK: sete 7define i32 @test_kortestz(i16 %a0, i16 %a1) { 8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) 9 ret i32 %res 10} 11 12declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone 13; CHECK-LABEL: test_kortestc 14; CHECK: kortestw 15; CHECK: sbbl 16define i32 @test_kortestc(i16 %a0, i16 %a1) { 17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) 18 ret i32 %res 19} 20 21declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone 22; CHECK-LABEL: test_kand 23; CHECK: kandw 24; CHECK: kandw 25define i16 @test_kand(i16 %a0, i16 %a1) { 26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) 27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) 28 ret i16 %t2 29} 30 31declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone 32; CHECK-LABEL: test_knot 33; CHECK: knotw 34define i16 @test_knot(i16 %a0) { 35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) 36 ret i16 %res 37} 38 39declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone 40 41; CHECK-LABEL: unpckbw_test 42; CHECK: kunpckbw 43; CHECK:ret 44define i16 @unpckbw_test(i16 %a0, i16 %a1) { 45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) 46 ret i16 %res 47} 48 49define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { 50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0] 51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 52 ret <16 x float> %res 53} 54declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 55 56define <8 x double> @test_rcp_pd_512(<8 x double> %a0) { 57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0] 58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] 59 ret <8 x double> %res 60} 61declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone 62 63declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 64 65define <8 x double> @test7(<8 x double> %a) { 66; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b] 67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) 68 ret <8 x double>%res 69} 70 71declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 72 73define <16 x float> @test8(<16 x float> %a) { 74; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b] 75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) 76 ret <16 x float>%res 77} 78 79define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { 80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0] 81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] 82 ret <16 x float> %res 83} 84declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone 85 86define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { 87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0] 88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 89 ret <4 x float> %res 90} 91declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 92 93define <4 x float> @test_rcp14_ss(<4 x float> %a0) { 94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0] 95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] 96 ret <4 x float> %res 97} 98declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 99 100define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { 101 ; CHECK: vsqrtpd 102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] 103 ret <8 x double> %res 104} 105declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone 106 107define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { 108 ; CHECK: vsqrtps 109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] 110 ret <16 x float> %res 111} 112declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone 113 114define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { 115 ; CHECK: vsqrtss {{.*}}encoding: [0x62 116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 117 ret <4 x float> %res 118} 119declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone 120 121define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { 122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62 123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 124 ret <2 x double> %res 125} 126declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone 127 128define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62 130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 131 ret i64 %res 132} 133declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 134 135define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62 137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139} 140declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 141 142define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { 143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 145 ret <2 x double> %res 146} 147declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone 148 149define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { 150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 152 ret i64 %res 153} 154declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 155 156 157define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62 159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 160 ret i64 %res 161} 162declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 163 164 165define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62 167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 168 ret <4 x float> %res 169} 170declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 171 172 173define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { 174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62 175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1] 176 ret i64 %res 177} 178declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 179 180define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62 182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] 183 ret i64 %res 184} 185declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone 186 187define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { 188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0] 189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 190 ret <16 x float> %res 191} 192declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly 193 194 195define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) { 196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02] 197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) 198 ret <16 x i16> %res 199} 200 201declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly 202 203define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { 204 ; CHECK: vbroadcastss 205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] 206 ret <16 x float> %res 207} 208declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly 209 210define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { 211 ; CHECK: vbroadcastsd 212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] 213 ret <8 x double> %res 214} 215declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly 216 217define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) { 218 ; CHECK: vbroadcastss 219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1] 220 ret <16 x float> %res 221} 222declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly 223 224define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { 225 ; CHECK: vbroadcastsd 226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1] 227 ret <8 x double> %res 228} 229declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly 230 231define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) { 232 ; CHECK: vpbroadcastd 233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1] 234 ret <16 x i32> %res 235} 236declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly 237 238define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { 239 ; CHECK: vpbroadcastd 240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1] 241 ret <16 x i32> %res 242} 243declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly 244 245define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) { 246 ; CHECK: vpbroadcastq 247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1] 248 ret <8 x i64> %res 249} 250declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly 251 252define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { 253 ; CHECK: vpbroadcastq 254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1] 255 ret <8 x i64> %res 256} 257declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly 258 259define <16 x i32> @test_conflict_d(<16 x i32> %a) { 260 ; CHECK: movw $-1, %ax 261 ; CHECK: vpxor 262 ; CHECK: vpconflictd 263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 264 ret <16 x i32> %res 265} 266 267declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 268 269define <8 x i64> @test_conflict_q(<8 x i64> %a) { 270 ; CHECK: movb $-1, %al 271 ; CHECK: vpxor 272 ; CHECK: vpconflictq 273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 274 ret <8 x i64> %res 275} 276 277declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 278 279define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { 280 ; CHECK: vpconflictd 281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) 282 ret <16 x i32> %res 283} 284 285define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 286 ; CHECK: vpconflictq 287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 288 ret <8 x i64> %res 289} 290 291define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { 292 ; CHECK: movw $-1, %ax 293 ; CHECK: vpxor 294 ; CHECK: vplzcntd 295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) 296 ret <16 x i32> %res 297} 298 299declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 300 301define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { 302 ; CHECK: movb $-1, %al 303 ; CHECK: vpxor 304 ; CHECK: vplzcntq 305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) 306 ret <8 x i64> %res 307} 308 309declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 310 311 312define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 313 ; CHECK: vplzcntd 314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 315 ret <16 x i32> %res 316} 317 318define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 319 ; CHECK: vplzcntq 320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 321 ret <8 x i64> %res 322} 323 324define <16 x i32> @test_ctlz_d(<16 x i32> %a) { 325 ; CHECK-LABEL: test_ctlz_d 326 ; CHECK: vplzcntd 327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false) 328 ret <16 x i32> %res 329} 330 331declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly 332 333define <8 x i64> @test_ctlz_q(<8 x i64> %a) { 334 ; CHECK-LABEL: test_ctlz_q 335 ; CHECK: vplzcntq 336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false) 337 ret <8 x i64> %res 338} 339 340declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly 341 342define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { 343 ; CHECK: vblendmps %zmm1, %zmm0 344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1] 345 ret <16 x float> %res 346} 347 348declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly 349 350define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { 351 ; CHECK: vblendmpd %zmm1, %zmm0 352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1] 353 ret <8 x double> %res 354} 355 356define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) { 357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop 358 ; CHECK: vblendmpd (% 359 %b = load <8 x double>, <8 x double>* %ptr 360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1] 361 ret <8 x double> %res 362} 363declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly 364 365define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { 366 ; CHECK: vpblendmd 367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1] 368 ret <16 x i32> %res 369} 370declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly 371 372define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { 373 ; CHECK: vpblendmq 374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1] 375 ret <8 x i64> %res 376} 377declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly 378 379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) { 380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0] 381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2) 382 ret <8 x i32>%res 383 } 384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 385 386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) { 387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0] 388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1) 389 ret <16 x i32>%res 390 } 391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) 392 393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { 394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02] 395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8) 396 ret i16 %res 397 } 398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32) 399 400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { 401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04] 402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4) 403 ret i8 %res 404 } 405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32) 406 407 ; cvt intrinsics 408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) { 409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0] 410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1) 411 ret <16 x float>%res 412 } 413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) 414 415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) { 416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0] 417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1) 418 ret <16 x float>%res 419 } 420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) 421 422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) { 423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0] 424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1) 425 ret <8 x double>%res 426 } 427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) 428 429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) { 430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0] 431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1) 432 ret <8 x double>%res 433 } 434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) 435 436 ; fp min - max 437define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) { 438 ; CHECK: vmaxps 439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, 440 <16 x float>zeroinitializer, i16 -1, i32 4) 441 ret <16 x float> %res 442} 443declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, 444 <16 x float>, i16, i32) 445 446define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) { 447 ; CHECK: vmaxpd 448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1, 449 <8 x double>zeroinitializer, i8 -1, i32 4) 450 ret <8 x double> %res 451} 452declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>, 453 <8 x double>, i8, i32) 454 455define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) { 456 ; CHECK: vminps 457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, 458 <16 x float>zeroinitializer, i16 -1, i32 4) 459 ret <16 x float> %res 460} 461declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, 462 <16 x float>, i16, i32) 463 464define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) { 465 ; CHECK: vminpd 466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1, 467 <8 x double>zeroinitializer, i8 -1, i32 4) 468 ret <8 x double> %res 469} 470declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, 471 <8 x double>, i8, i32) 472 473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) { 474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0] 475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1) 476 ret <8 x float>%res 477 } 478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) 479 480 define <16 x i32> @test_pabsd(<16 x i32> %a) { 481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0] 482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1) 483 ret < 16 x i32> %res 484 } 485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) 486 487 define <8 x i64> @test_pabsq(<8 x i64> %a) { 488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0] 489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1) 490 ret <8 x i64> %res 491 } 492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) 493 494define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) { 495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1] 496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1, 497 <8 x i64>zeroinitializer, i8 -1) 498 ret <8 x i64> %res 499} 500declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 501 502define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) { 503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1] 504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1, 505 <16 x i32>zeroinitializer, i16 -1) 506 ret <16 x i32> %res 507} 508declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 509 510define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) { 511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1] 512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1, 513 <16 x i32>zeroinitializer, i16 -1) 514 ret <16 x i32> %res 515} 516declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 517 518define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { 519 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] 520 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) 521 ret i8 %res 522} 523declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8) 524 525define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) { 526 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1] 527 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) 528 ret i16 %res 529} 530declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16) 531 532define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) { 533; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07] 534 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 535 ret void 536} 537 538declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 ) 539 540define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) { 541; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07] 542 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 543 ret void 544} 545 546declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8) 547 548define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 549; CHECK-LABEL: test_mask_store_aligned_ps: 550; CHECK: ## BB#0: 551; CHECK-NEXT: kmovw %esi, %k1 552; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1} 553; CHECK-NEXT: retq 554 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask) 555 ret void 556} 557 558declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 ) 559 560define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 561; CHECK-LABEL: test_mask_store_aligned_pd: 562; CHECK: ## BB#0: 563; CHECK-NEXT: kmovw %esi, %k1 564; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1} 565; CHECK-NEXT: retq 566 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask) 567 ret void 568} 569 570declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8) 571 572define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 573; CHECK-LABEL: test_maskz_load_aligned_ps: 574; CHECK: ## BB#0: 575; CHECK-NEXT: kmovw %esi, %k1 576; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} 577; CHECK-NEXT: retq 578 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask) 579 ret <16 x float> %res 580} 581 582declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16) 583 584define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 585; CHECK-LABEL: test_maskz_load_aligned_pd: 586; CHECK: ## BB#0: 587; CHECK-NEXT: kmovw %esi, %k1 588; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} 589; CHECK-NEXT: retq 590 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask) 591 ret <8 x double> %res 592} 593 594declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8) 595 596define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) { 597; CHECK-LABEL: test_load_aligned_ps: 598; CHECK: ## BB#0: 599; CHECK-NEXT: vmovaps (%rdi), %zmm0 600; CHECK-NEXT: retq 601 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1) 602 ret <16 x float> %res 603} 604 605define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) { 606; CHECK-LABEL: test_load_aligned_pd: 607; CHECK: ## BB#0: 608; CHECK-NEXT: vmovapd (%rdi), %zmm0 609; CHECK-NEXT: retq 610 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1) 611 ret <8 x double> %res 612} 613 614define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) { 615; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1] 616 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1) 617 ret <16 x float> %res 618} 619 620define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) { 621; CHECK-LABEL: test_vpermt2ps_mask: 622; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1] 623 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask) 624 ret <16 x float> %res 625} 626 627declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) 628 629define <8 x i64> @test_vmovntdqa(i8 *%x) { 630; CHECK-LABEL: test_vmovntdqa: 631; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07] 632 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x) 633 ret <8 x i64> %res 634} 635 636declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) 637 638define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { 639; CHECK-LABEL: test_valign_q: 640; CHECK: valignq $2, %zmm1, %zmm0, %zmm0 641 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1) 642 ret <8 x i64> %res 643} 644 645define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) { 646; CHECK-LABEL: test_mask_valign_q: 647; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} 648 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask) 649 ret <8 x i64> %res 650} 651 652declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8) 653 654define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 655; CHECK-LABEL: test_maskz_valign_d: 656; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05] 657 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask) 658 ret <16 x i32> %res 659} 660 661declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16) 662 663define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { 664 ; CHECK-LABEL: test_mask_store_ss 665 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07] 666 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask) 667 ret void 668} 669 670declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) 671 672define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { 673; CHECK-LABEL: test_pcmpeq_d 674; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ## 675 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 676 ret i16 %res 677} 678 679define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 680; CHECK-LABEL: test_mask_pcmpeq_d 681; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## 682 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 683 ret i16 %res 684} 685 686declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) 687 688define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { 689; CHECK-LABEL: test_pcmpeq_q 690; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ## 691 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 692 ret i8 %res 693} 694 695define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 696; CHECK-LABEL: test_mask_pcmpeq_q 697; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## 698 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 699 ret i8 %res 700} 701 702declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) 703 704define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) { 705; CHECK-LABEL: test_pcmpgt_d 706; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ## 707 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) 708 ret i16 %res 709} 710 711define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 712; CHECK-LABEL: test_mask_pcmpgt_d 713; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ## 714 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) 715 ret i16 %res 716} 717 718declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) 719 720define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) { 721; CHECK-LABEL: test_pcmpgt_q 722; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ## 723 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) 724 ret i8 %res 725} 726 727define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 728; CHECK-LABEL: test_mask_pcmpgt_q 729; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ## 730 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) 731 ret i8 %res 732} 733 734declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) 735 736define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 737; CHECK_LABEL: test_cmp_d_512 738; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ## 739 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1) 740 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 741; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ## 742 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1) 743 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 744; CHECK: vpcmpled %zmm1, %zmm0, %k0 ## 745 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1) 746 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 747; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ## 748 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1) 749 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 750; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ## 751 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1) 752 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 753; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ## 754 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1) 755 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 756; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ## 757 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1) 758 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 759; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ## 760 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1) 761 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 762 ret <8 x i16> %vec7 763} 764 765define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 766; CHECK_LABEL: test_mask_cmp_d_512 767; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## 768 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask) 769 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 770; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ## 771 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask) 772 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 773; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ## 774 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask) 775 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 776; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ## 777 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask) 778 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 779; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ## 780 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask) 781 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 782; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ## 783 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask) 784 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 785; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ## 786 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask) 787 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 788; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ## 789 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask) 790 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 791 ret <8 x i16> %vec7 792} 793 794declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone 795 796define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) { 797; CHECK_LABEL: test_ucmp_d_512 798; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ## 799 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1) 800 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 801; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ## 802 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1) 803 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 804; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ## 805 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1) 806 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 807; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ## 808 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1) 809 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 810; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ## 811 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1) 812 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 813; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ## 814 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1) 815 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 816; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ## 817 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1) 818 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 819; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ## 820 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1) 821 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 822 ret <8 x i16> %vec7 823} 824 825define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 826; CHECK_LABEL: test_mask_ucmp_d_512 827; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ## 828 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask) 829 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 830; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ## 831 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask) 832 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 833; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ## 834 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask) 835 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 836; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ## 837 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask) 838 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 839; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ## 840 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask) 841 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 842; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ## 843 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask) 844 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 845; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ## 846 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask) 847 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 848; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ## 849 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask) 850 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 851 ret <8 x i16> %vec7 852} 853 854declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone 855 856define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 857; CHECK_LABEL: test_cmp_q_512 858; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ## 859 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1) 860 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 861; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ## 862 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1) 863 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 864; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ## 865 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1) 866 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 867; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ## 868 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1) 869 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 870; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ## 871 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1) 872 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 873; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ## 874 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1) 875 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 876; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ## 877 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1) 878 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 879; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ## 880 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1) 881 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 882 ret <8 x i8> %vec7 883} 884 885define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 886; CHECK_LABEL: test_mask_cmp_q_512 887; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## 888 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask) 889 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 890; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ## 891 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask) 892 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 893; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ## 894 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask) 895 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 896; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ## 897 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask) 898 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 899; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## 900 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask) 901 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 902; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ## 903 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask) 904 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 905; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ## 906 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask) 907 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 908; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ## 909 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask) 910 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 911 ret <8 x i8> %vec7 912} 913 914declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone 915 916define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { 917; CHECK_LABEL: test_ucmp_q_512 918; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ## 919 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1) 920 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 921; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ## 922 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1) 923 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 924; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ## 925 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1) 926 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 927; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ## 928 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1) 929 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 930; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ## 931 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1) 932 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 933; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ## 934 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1) 935 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 936; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ## 937 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1) 938 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 939; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ## 940 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1) 941 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 942 ret <8 x i8> %vec7 943} 944 945define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 946; CHECK_LABEL: test_mask_ucmp_q_512 947; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ## 948 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask) 949 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 950; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## 951 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask) 952 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 953; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ## 954 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask) 955 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 956; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ## 957 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask) 958 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 959; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ## 960 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask) 961 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 962; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ## 963 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask) 964 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 965; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ## 966 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask) 967 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 968; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ## 969 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask) 970 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 971 ret <8 x i8> %vec7 972} 973 974declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone 975 976define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { 977; CHECK-LABEL: test_mask_vextractf32x4: 978; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1} 979 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask) 980 ret <4 x float> %res 981} 982 983declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8) 984 985define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { 986; CHECK-LABEL: test_mask_vextracti64x4: 987; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1} 988 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask) 989 ret <4 x i64> %res 990} 991 992declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8) 993 994define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { 995; CHECK-LABEL: test_maskz_vextracti32x4: 996; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} 997 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask) 998 ret <4 x i32> %res 999} 1000 1001declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8) 1002 1003define <4 x double> @test_vextractf64x4(<8 x double> %a) { 1004; CHECK-LABEL: test_vextractf64x4: 1005; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ## 1006 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1) 1007 ret <4 x double> %res 1008} 1009 1010declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8) 1011 1012define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) { 1013 ; CHECK-LABEL: test_x86_avx512_pslli_d 1014 ; CHECK: vpslld 1015 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1016 ret <16 x i32> %res 1017} 1018 1019define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1020 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d 1021 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1} 1022 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1023 ret <16 x i32> %res 1024} 1025 1026define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) { 1027 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d 1028 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z} 1029 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1030 ret <16 x i32> %res 1031} 1032 1033declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1034 1035define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) { 1036 ; CHECK-LABEL: test_x86_avx512_pslli_q 1037 ; CHECK: vpsllq 1038 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1039 ret <8 x i64> %res 1040} 1041 1042define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1043 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q 1044 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1} 1045 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1046 ret <8 x i64> %res 1047} 1048 1049define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) { 1050 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q 1051 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 1052 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1053 ret <8 x i64> %res 1054} 1055 1056declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1057 1058define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) { 1059 ; CHECK-LABEL: test_x86_avx512_psrli_d 1060 ; CHECK: vpsrld 1061 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1062 ret <16 x i32> %res 1063} 1064 1065define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1066 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d 1067 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1} 1068 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1069 ret <16 x i32> %res 1070} 1071 1072define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) { 1073 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d 1074 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z} 1075 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1076 ret <16 x i32> %res 1077} 1078 1079declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1080 1081define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) { 1082 ; CHECK-LABEL: test_x86_avx512_psrli_q 1083 ; CHECK: vpsrlq 1084 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1085 ret <8 x i64> %res 1086} 1087 1088define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1089 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q 1090 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1} 1091 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1092 ret <8 x i64> %res 1093} 1094 1095define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) { 1096 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q 1097 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} 1098 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1099 ret <8 x i64> %res 1100} 1101 1102declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1103 1104define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) { 1105 ; CHECK-LABEL: test_x86_avx512_psrai_d 1106 ; CHECK: vpsrad 1107 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) 1108 ret <16 x i32> %res 1109} 1110 1111define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1112 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d 1113 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1} 1114 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) 1115 ret <16 x i32> %res 1116} 1117 1118define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) { 1119 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d 1120 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z} 1121 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) 1122 ret <16 x i32> %res 1123} 1124 1125declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone 1126 1127define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) { 1128 ; CHECK-LABEL: test_x86_avx512_psrai_q 1129 ; CHECK: vpsraq 1130 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) 1131 ret <8 x i64> %res 1132} 1133 1134define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1135 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q 1136 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1} 1137 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) 1138 ret <8 x i64> %res 1139} 1140 1141define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) { 1142 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q 1143 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z} 1144 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) 1145 ret <8 x i64> %res 1146} 1147 1148declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone 1149 1150define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) { 1151 ; CHECK-LABEL: test_x86_avx512_psll_d 1152 ; CHECK: vpslld 1153 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1154 ret <16 x i32> %res 1155} 1156 1157define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1158 ; CHECK-LABEL: test_x86_avx512_mask_psll_d 1159 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1} 1160 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1161 ret <16 x i32> %res 1162} 1163 1164define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1165 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d 1166 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} 1167 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1168 ret <16 x i32> %res 1169} 1170 1171declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1172 1173define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) { 1174 ; CHECK-LABEL: test_x86_avx512_psll_q 1175 ; CHECK: vpsllq 1176 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1177 ret <8 x i64> %res 1178} 1179 1180define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1181 ; CHECK-LABEL: test_x86_avx512_mask_psll_q 1182 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1} 1183 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1184 ret <8 x i64> %res 1185} 1186 1187define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1188 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q 1189 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} 1190 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1191 ret <8 x i64> %res 1192} 1193 1194declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1195 1196define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) { 1197 ; CHECK-LABEL: test_x86_avx512_psrl_d 1198 ; CHECK: vpsrld 1199 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1200 ret <16 x i32> %res 1201} 1202 1203define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1204 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d 1205 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1} 1206 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1207 ret <16 x i32> %res 1208} 1209 1210define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1211 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d 1212 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} 1213 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1214 ret <16 x i32> %res 1215} 1216 1217declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1218 1219define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) { 1220 ; CHECK-LABEL: test_x86_avx512_psrl_q 1221 ; CHECK: vpsrlq 1222 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1223 ret <8 x i64> %res 1224} 1225 1226define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1227 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q 1228 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} 1229 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1230 ret <8 x i64> %res 1231} 1232 1233define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1234 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q 1235 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} 1236 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1237 ret <8 x i64> %res 1238} 1239 1240declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1241 1242define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) { 1243 ; CHECK-LABEL: test_x86_avx512_psra_d 1244 ; CHECK: vpsrad 1245 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1246 ret <16 x i32> %res 1247} 1248 1249define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1250 ; CHECK-LABEL: test_x86_avx512_mask_psra_d 1251 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1} 1252 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) 1253 ret <16 x i32> %res 1254} 1255 1256define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) { 1257 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d 1258 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} 1259 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1260 ret <16 x i32> %res 1261} 1262 1263declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone 1264 1265define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) { 1266 ; CHECK-LABEL: test_x86_avx512_psra_q 1267 ; CHECK: vpsraq 1268 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1269 ret <8 x i64> %res 1270} 1271 1272define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1273 ; CHECK-LABEL: test_x86_avx512_mask_psra_q 1274 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1} 1275 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) 1276 ret <8 x i64> %res 1277} 1278 1279define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) { 1280 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q 1281 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} 1282 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1283 ret <8 x i64> %res 1284} 1285 1286declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone 1287 1288define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) { 1289 ; CHECK-LABEL: test_x86_avx512_psllv_d 1290 ; CHECK: vpsllvd 1291 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1292 ret <16 x i32> %res 1293} 1294 1295define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1296 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d 1297 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} 1298 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1299 ret <16 x i32> %res 1300} 1301 1302define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1303 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d 1304 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1305 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1306 ret <16 x i32> %res 1307} 1308 1309declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1310 1311define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) { 1312 ; CHECK-LABEL: test_x86_avx512_psllv_q 1313 ; CHECK: vpsllvq 1314 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1315 ret <8 x i64> %res 1316} 1317 1318define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1319 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q 1320 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} 1321 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1322 ret <8 x i64> %res 1323} 1324 1325define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1326 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q 1327 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1328 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1329 ret <8 x i64> %res 1330} 1331 1332declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1333 1334 1335define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) { 1336 ; CHECK-LABEL: test_x86_avx512_psrav_d 1337 ; CHECK: vpsravd 1338 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1339 ret <16 x i32> %res 1340} 1341 1342define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1343 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d 1344 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1} 1345 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1346 ret <16 x i32> %res 1347} 1348 1349define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1350 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d 1351 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} 1352 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1353 ret <16 x i32> %res 1354} 1355 1356declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1357 1358define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) { 1359 ; CHECK-LABEL: test_x86_avx512_psrav_q 1360 ; CHECK: vpsravq 1361 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1362 ret <8 x i64> %res 1363} 1364 1365define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1366 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q 1367 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1} 1368 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1369 ret <8 x i64> %res 1370} 1371 1372define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1373 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q 1374 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} 1375 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1376 ret <8 x i64> %res 1377} 1378 1379declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1380 1381define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) { 1382 ; CHECK-LABEL: test_x86_avx512_psrlv_d 1383 ; CHECK: vpsrlvd 1384 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) 1385 ret <16 x i32> %res 1386} 1387 1388define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { 1389 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d 1390 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} 1391 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) 1392 ret <16 x i32> %res 1393} 1394 1395define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) { 1396 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d 1397 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} 1398 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) 1399 ret <16 x i32> %res 1400} 1401 1402declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone 1403 1404define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) { 1405 ; CHECK-LABEL: test_x86_avx512_psrlv_q 1406 ; CHECK: vpsrlvq 1407 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) 1408 ret <8 x i64> %res 1409} 1410 1411define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { 1412 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q 1413 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} 1414 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) 1415 ret <8 x i64> %res 1416} 1417 1418define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { 1419 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q 1420 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} 1421 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) 1422 ret <8 x i64> %res 1423} 1424 1425declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone 1426 1427define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) { 1428 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop 1429 ; CHECK: vpsrlvq (% 1430 %b = load <8 x i64>, <8 x i64>* %ptr 1431 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1432 ret <8 x i64> %res 1433} 1434 1435declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 1436declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 1437declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 1438 1439define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) { 1440 ; CHECK-LABEL: test_vsubps_rn 1441 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1] 1442 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1443 <16 x float> zeroinitializer, i16 -1, i32 0) 1444 ret <16 x float> %res 1445} 1446 1447define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) { 1448 ; CHECK-LABEL: test_vsubps_rd 1449 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1] 1450 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1451 <16 x float> zeroinitializer, i16 -1, i32 1) 1452 ret <16 x float> %res 1453} 1454 1455define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) { 1456 ; CHECK-LABEL: test_vsubps_ru 1457 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1] 1458 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1459 <16 x float> zeroinitializer, i16 -1, i32 2) 1460 ret <16 x float> %res 1461} 1462 1463define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) { 1464 ; CHECK-LABEL: test_vsubps_rz 1465 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1] 1466 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, 1467 <16 x float> zeroinitializer, i16 -1, i32 3) 1468 ret <16 x float> %res 1469} 1470 1471define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) { 1472 ; CHECK-LABEL: test_vmulps_rn 1473 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1] 1474 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1475 <16 x float> zeroinitializer, i16 -1, i32 0) 1476 ret <16 x float> %res 1477} 1478 1479define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) { 1480 ; CHECK-LABEL: test_vmulps_rd 1481 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1] 1482 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1483 <16 x float> zeroinitializer, i16 -1, i32 1) 1484 ret <16 x float> %res 1485} 1486 1487define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) { 1488 ; CHECK-LABEL: test_vmulps_ru 1489 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1] 1490 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1491 <16 x float> zeroinitializer, i16 -1, i32 2) 1492 ret <16 x float> %res 1493} 1494 1495define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) { 1496 ; CHECK-LABEL: test_vmulps_rz 1497 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1] 1498 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1499 <16 x float> zeroinitializer, i16 -1, i32 3) 1500 ret <16 x float> %res 1501} 1502 1503;; mask float 1504define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1505 ; CHECK-LABEL: test_vmulps_mask_rn 1506 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1] 1507 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1508 <16 x float> zeroinitializer, i16 %mask, i32 0) 1509 ret <16 x float> %res 1510} 1511 1512define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1513 ; CHECK-LABEL: test_vmulps_mask_rd 1514 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1] 1515 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1516 <16 x float> zeroinitializer, i16 %mask, i32 1) 1517 ret <16 x float> %res 1518} 1519 1520define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1521 ; CHECK-LABEL: test_vmulps_mask_ru 1522 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1] 1523 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1524 <16 x float> zeroinitializer, i16 %mask, i32 2) 1525 ret <16 x float> %res 1526} 1527 1528define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) { 1529 ; CHECK-LABEL: test_vmulps_mask_rz 1530 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1] 1531 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1532 <16 x float> zeroinitializer, i16 %mask, i32 3) 1533 ret <16 x float> %res 1534} 1535 1536;; With Passthru value 1537define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1538 ; CHECK-LABEL: test_vmulps_mask_passthru_rn 1539 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1] 1540 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1541 <16 x float> %passthru, i16 %mask, i32 0) 1542 ret <16 x float> %res 1543} 1544 1545define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1546 ; CHECK-LABEL: test_vmulps_mask_passthru_rd 1547 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1] 1548 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1549 <16 x float> %passthru, i16 %mask, i32 1) 1550 ret <16 x float> %res 1551} 1552 1553define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1554 ; CHECK-LABEL: test_vmulps_mask_passthru_ru 1555 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1] 1556 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1557 <16 x float> %passthru, i16 %mask, i32 2) 1558 ret <16 x float> %res 1559} 1560 1561define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) { 1562 ; CHECK-LABEL: test_vmulps_mask_passthru_rz 1563 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1] 1564 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, 1565 <16 x float> %passthru, i16 %mask, i32 3) 1566 ret <16 x float> %res 1567} 1568 1569;; mask double 1570define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1571 ; CHECK-LABEL: test_vmulpd_mask_rn 1572 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1] 1573 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1574 <8 x double> zeroinitializer, i8 %mask, i32 0) 1575 ret <8 x double> %res 1576} 1577 1578define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1579 ; CHECK-LABEL: test_vmulpd_mask_rd 1580 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1] 1581 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1582 <8 x double> zeroinitializer, i8 %mask, i32 1) 1583 ret <8 x double> %res 1584} 1585 1586define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1587 ; CHECK-LABEL: test_vmulpd_mask_ru 1588 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1] 1589 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1590 <8 x double> zeroinitializer, i8 %mask, i32 2) 1591 ret <8 x double> %res 1592} 1593 1594define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) { 1595 ; CHECK-LABEL: test_vmulpd_mask_rz 1596 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1] 1597 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, 1598 <8 x double> zeroinitializer, i8 %mask, i32 3) 1599 ret <8 x double> %res 1600} 1601 1602define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { 1603 ;CHECK-LABEL: test_xor_epi32 1604 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1] 1605 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1606 ret < 16 x i32> %res 1607} 1608 1609define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1610 ;CHECK-LABEL: test_mask_xor_epi32 1611 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1] 1612 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1613 ret < 16 x i32> %res 1614} 1615 1616declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1617 1618define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { 1619 ;CHECK-LABEL: test_or_epi32 1620 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1] 1621 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1622 ret < 16 x i32> %res 1623} 1624 1625define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1626 ;CHECK-LABEL: test_mask_or_epi32 1627 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1] 1628 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1629 ret < 16 x i32> %res 1630} 1631 1632declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1633 1634define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { 1635 ;CHECK-LABEL: test_and_epi32 1636 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1] 1637 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) 1638 ret < 16 x i32> %res 1639} 1640 1641define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1642 ;CHECK-LABEL: test_mask_and_epi32 1643 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1] 1644 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1645 ret < 16 x i32> %res 1646} 1647 1648declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1649 1650define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) { 1651 ;CHECK-LABEL: test_xor_epi64 1652 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1] 1653 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1654 ret < 8 x i64> %res 1655} 1656 1657define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1658 ;CHECK-LABEL: test_mask_xor_epi64 1659 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1] 1660 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1661 ret < 8 x i64> %res 1662} 1663 1664declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1665 1666define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) { 1667 ;CHECK-LABEL: test_or_epi64 1668 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1] 1669 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1670 ret < 8 x i64> %res 1671} 1672 1673define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1674 ;CHECK-LABEL: test_mask_or_epi64 1675 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1] 1676 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1677 ret < 8 x i64> %res 1678} 1679 1680declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1681 1682define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) { 1683 ;CHECK-LABEL: test_and_epi64 1684 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1] 1685 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) 1686 ret < 8 x i64> %res 1687} 1688 1689define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1690 ;CHECK-LABEL: test_mask_and_epi64 1691 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1] 1692 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1693 ret < 8 x i64> %res 1694} 1695 1696declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1697 1698 1699define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 1700 ;CHECK-LABEL: test_mask_add_epi32_rr 1701 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1] 1702 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1703 ret < 16 x i32> %res 1704} 1705 1706define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1707 ;CHECK-LABEL: test_mask_add_epi32_rrk 1708 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1] 1709 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1710 ret < 16 x i32> %res 1711} 1712 1713define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1714 ;CHECK-LABEL: test_mask_add_epi32_rrkz 1715 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1] 1716 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1717 ret < 16 x i32> %res 1718} 1719 1720define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 1721 ;CHECK-LABEL: test_mask_add_epi32_rm 1722 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07] 1723 %b = load <16 x i32>, <16 x i32>* %ptr_b 1724 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1725 ret < 16 x i32> %res 1726} 1727 1728define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1729 ;CHECK-LABEL: test_mask_add_epi32_rmk 1730 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f] 1731 %b = load <16 x i32>, <16 x i32>* %ptr_b 1732 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1733 ret < 16 x i32> %res 1734} 1735 1736define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 1737 ;CHECK-LABEL: test_mask_add_epi32_rmkz 1738 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07] 1739 %b = load <16 x i32>, <16 x i32>* %ptr_b 1740 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1741 ret < 16 x i32> %res 1742} 1743 1744define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 1745 ;CHECK-LABEL: test_mask_add_epi32_rmb 1746 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07] 1747 %q = load i32, i32* %ptr_b 1748 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1749 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1750 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1751 ret < 16 x i32> %res 1752} 1753 1754define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1755 ;CHECK-LABEL: test_mask_add_epi32_rmbk 1756 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f] 1757 %q = load i32, i32* %ptr_b 1758 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1759 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1760 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1761 ret < 16 x i32> %res 1762} 1763 1764define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 1765 ;CHECK-LABEL: test_mask_add_epi32_rmbkz 1766 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07] 1767 %q = load i32, i32* %ptr_b 1768 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1769 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1770 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1771 ret < 16 x i32> %res 1772} 1773 1774declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1775 1776define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 1777 ;CHECK-LABEL: test_mask_sub_epi32_rr 1778 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1] 1779 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1780 ret < 16 x i32> %res 1781} 1782 1783define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) { 1784 ;CHECK-LABEL: test_mask_sub_epi32_rrk 1785 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1] 1786 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1787 ret < 16 x i32> %res 1788} 1789 1790define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) { 1791 ;CHECK-LABEL: test_mask_sub_epi32_rrkz 1792 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1] 1793 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1794 ret < 16 x i32> %res 1795} 1796 1797define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 1798 ;CHECK-LABEL: test_mask_sub_epi32_rm 1799 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07] 1800 %b = load <16 x i32>, <16 x i32>* %ptr_b 1801 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1802 ret < 16 x i32> %res 1803} 1804 1805define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1806 ;CHECK-LABEL: test_mask_sub_epi32_rmk 1807 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f] 1808 %b = load <16 x i32>, <16 x i32>* %ptr_b 1809 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1810 ret < 16 x i32> %res 1811} 1812 1813define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) { 1814 ;CHECK-LABEL: test_mask_sub_epi32_rmkz 1815 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07] 1816 %b = load <16 x i32>, <16 x i32>* %ptr_b 1817 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1818 ret < 16 x i32> %res 1819} 1820 1821define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) { 1822 ;CHECK-LABEL: test_mask_sub_epi32_rmb 1823 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07] 1824 %q = load i32, i32* %ptr_b 1825 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1826 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1827 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) 1828 ret < 16 x i32> %res 1829} 1830 1831define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) { 1832 ;CHECK-LABEL: test_mask_sub_epi32_rmbk 1833 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f] 1834 %q = load i32, i32* %ptr_b 1835 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1836 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1837 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) 1838 ret < 16 x i32> %res 1839} 1840 1841define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) { 1842 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz 1843 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07] 1844 %q = load i32, i32* %ptr_b 1845 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1846 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1847 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) 1848 ret < 16 x i32> %res 1849} 1850 1851declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 1852 1853define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 1854 ;CHECK-LABEL: test_mask_add_epi64_rr 1855 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1] 1856 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1857 ret < 8 x i64> %res 1858} 1859 1860define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1861 ;CHECK-LABEL: test_mask_add_epi64_rrk 1862 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1] 1863 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1864 ret < 8 x i64> %res 1865} 1866 1867define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1868 ;CHECK-LABEL: test_mask_add_epi64_rrkz 1869 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1] 1870 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1871 ret < 8 x i64> %res 1872} 1873 1874define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 1875 ;CHECK-LABEL: test_mask_add_epi64_rm 1876 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07] 1877 %b = load <8 x i64>, <8 x i64>* %ptr_b 1878 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1879 ret < 8 x i64> %res 1880} 1881 1882define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1883 ;CHECK-LABEL: test_mask_add_epi64_rmk 1884 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f] 1885 %b = load <8 x i64>, <8 x i64>* %ptr_b 1886 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1887 ret < 8 x i64> %res 1888} 1889 1890define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 1891 ;CHECK-LABEL: test_mask_add_epi64_rmkz 1892 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07] 1893 %b = load <8 x i64>, <8 x i64>* %ptr_b 1894 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1895 ret < 8 x i64> %res 1896} 1897 1898define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 1899 ;CHECK-LABEL: test_mask_add_epi64_rmb 1900 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07] 1901 %q = load i64, i64* %ptr_b 1902 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 1903 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 1904 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1905 ret < 8 x i64> %res 1906} 1907 1908define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1909 ;CHECK-LABEL: test_mask_add_epi64_rmbk 1910 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f] 1911 %q = load i64, i64* %ptr_b 1912 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 1913 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 1914 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1915 ret < 8 x i64> %res 1916} 1917 1918define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 1919 ;CHECK-LABEL: test_mask_add_epi64_rmbkz 1920 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07] 1921 %q = load i64, i64* %ptr_b 1922 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 1923 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 1924 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1925 ret < 8 x i64> %res 1926} 1927 1928declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 1929 1930define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) { 1931 ;CHECK-LABEL: test_mask_sub_epi64_rr 1932 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1] 1933 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1934 ret < 8 x i64> %res 1935} 1936 1937define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) { 1938 ;CHECK-LABEL: test_mask_sub_epi64_rrk 1939 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1] 1940 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1941 ret < 8 x i64> %res 1942} 1943 1944define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) { 1945 ;CHECK-LABEL: test_mask_sub_epi64_rrkz 1946 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1] 1947 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1948 ret < 8 x i64> %res 1949} 1950 1951define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) { 1952 ;CHECK-LABEL: test_mask_sub_epi64_rm 1953 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07] 1954 %b = load <8 x i64>, <8 x i64>* %ptr_b 1955 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1956 ret < 8 x i64> %res 1957} 1958 1959define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1960 ;CHECK-LABEL: test_mask_sub_epi64_rmk 1961 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f] 1962 %b = load <8 x i64>, <8 x i64>* %ptr_b 1963 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1964 ret < 8 x i64> %res 1965} 1966 1967define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) { 1968 ;CHECK-LABEL: test_mask_sub_epi64_rmkz 1969 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07] 1970 %b = load <8 x i64>, <8 x i64>* %ptr_b 1971 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 1972 ret < 8 x i64> %res 1973} 1974 1975define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) { 1976 ;CHECK-LABEL: test_mask_sub_epi64_rmb 1977 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07] 1978 %q = load i64, i64* %ptr_b 1979 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 1980 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 1981 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) 1982 ret < 8 x i64> %res 1983} 1984 1985define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 1986 ;CHECK-LABEL: test_mask_sub_epi64_rmbk 1987 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f] 1988 %q = load i64, i64* %ptr_b 1989 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 1990 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 1991 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) 1992 ret < 8 x i64> %res 1993} 1994 1995define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) { 1996 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz 1997 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07] 1998 %q = load i64, i64* %ptr_b 1999 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2000 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2001 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) 2002 ret < 8 x i64> %res 2003} 2004 2005declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 2006 2007define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) { 2008 ;CHECK-LABEL: test_mask_mul_epi32_rr 2009 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1] 2010 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2011 ret < 8 x i64> %res 2012} 2013 2014define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2015 ;CHECK-LABEL: test_mask_mul_epi32_rrk 2016 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1] 2017 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2018 ret < 8 x i64> %res 2019} 2020 2021define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2022 ;CHECK-LABEL: test_mask_mul_epi32_rrkz 2023 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1] 2024 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2025 ret < 8 x i64> %res 2026} 2027 2028define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2029 ;CHECK-LABEL: test_mask_mul_epi32_rm 2030 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07] 2031 %b = load <16 x i32>, <16 x i32>* %ptr_b 2032 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2033 ret < 8 x i64> %res 2034} 2035 2036define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2037 ;CHECK-LABEL: test_mask_mul_epi32_rmk 2038 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f] 2039 %b = load <16 x i32>, <16 x i32>* %ptr_b 2040 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2041 ret < 8 x i64> %res 2042} 2043 2044define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 2045 ;CHECK-LABEL: test_mask_mul_epi32_rmkz 2046 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07] 2047 %b = load <16 x i32>, <16 x i32>* %ptr_b 2048 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2049 ret < 8 x i64> %res 2050} 2051 2052define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) { 2053 ;CHECK-LABEL: test_mask_mul_epi32_rmb 2054 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07] 2055 %q = load i64, i64* %ptr_b 2056 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2057 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2058 %b = bitcast <8 x i64> %b64 to <16 x i32> 2059 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2060 ret < 8 x i64> %res 2061} 2062 2063define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2064 ;CHECK-LABEL: test_mask_mul_epi32_rmbk 2065 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f] 2066 %q = load i64, i64* %ptr_b 2067 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2068 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2069 %b = bitcast <8 x i64> %b64 to <16 x i32> 2070 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2071 ret < 8 x i64> %res 2072} 2073 2074define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 2075 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz 2076 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07] 2077 %q = load i64, i64* %ptr_b 2078 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2079 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2080 %b = bitcast <8 x i64> %b64 to <16 x i32> 2081 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2082 ret < 8 x i64> %res 2083} 2084 2085declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 2086 2087define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) { 2088 ;CHECK-LABEL: test_mask_mul_epu32_rr 2089 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1] 2090 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2091 ret < 8 x i64> %res 2092} 2093 2094define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) { 2095 ;CHECK-LABEL: test_mask_mul_epu32_rrk 2096 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1] 2097 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2098 ret < 8 x i64> %res 2099} 2100 2101define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) { 2102 ;CHECK-LABEL: test_mask_mul_epu32_rrkz 2103 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1] 2104 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2105 ret < 8 x i64> %res 2106} 2107 2108define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) { 2109 ;CHECK-LABEL: test_mask_mul_epu32_rm 2110 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07] 2111 %b = load <16 x i32>, <16 x i32>* %ptr_b 2112 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2113 ret < 8 x i64> %res 2114} 2115 2116define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2117 ;CHECK-LABEL: test_mask_mul_epu32_rmk 2118 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f] 2119 %b = load <16 x i32>, <16 x i32>* %ptr_b 2120 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2121 ret < 8 x i64> %res 2122} 2123 2124define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) { 2125 ;CHECK-LABEL: test_mask_mul_epu32_rmkz 2126 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07] 2127 %b = load <16 x i32>, <16 x i32>* %ptr_b 2128 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2129 ret < 8 x i64> %res 2130} 2131 2132define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) { 2133 ;CHECK-LABEL: test_mask_mul_epu32_rmb 2134 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07] 2135 %q = load i64, i64* %ptr_b 2136 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2137 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2138 %b = bitcast <8 x i64> %b64 to <16 x i32> 2139 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) 2140 ret < 8 x i64> %res 2141} 2142 2143define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) { 2144 ;CHECK-LABEL: test_mask_mul_epu32_rmbk 2145 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f] 2146 %q = load i64, i64* %ptr_b 2147 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2148 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2149 %b = bitcast <8 x i64> %b64 to <16 x i32> 2150 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) 2151 ret < 8 x i64> %res 2152} 2153 2154define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) { 2155 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz 2156 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07] 2157 %q = load i64, i64* %ptr_b 2158 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 2159 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer 2160 %b = bitcast <8 x i64> %b64 to <16 x i32> 2161 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) 2162 ret < 8 x i64> %res 2163} 2164 2165declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) 2166