1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck %s 3 4declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) 5declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) 6declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) 7declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) 8declare <8 x double> @llvm.floor.v8f64(<8 x double> %p) 9declare <16 x float> @llvm.floor.v16f32(<16 x float> %p) 10declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 11declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 12declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 13declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 14declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 15declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 16declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 17declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 18declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 19declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 20declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 21declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 22declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) 23declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) 24declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) 25declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) 26declare <8 x double> @llvm.rint.v8f64(<8 x double> %p) 27declare <16 x float> @llvm.rint.v16f32(<16 x float> %p) 28declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 29declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 30declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 31declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 32declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 33declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 34 35define <2 x double> @floor_v2f64(<2 x double> %p) { 36; CHECK-LABEL: floor_v2f64: 37; CHECK: ## %bb.0: 38; CHECK-NEXT: vroundpd $9, %xmm0, %xmm0 39; CHECK-NEXT: retq 40 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 41 ret <2 x double> %t 42} 43 44define <4 x float> @floor_v4f32(<4 x float> %p) { 45; CHECK-LABEL: floor_v4f32: 46; CHECK: ## %bb.0: 47; CHECK-NEXT: vroundps $9, %xmm0, %xmm0 48; CHECK-NEXT: retq 49 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 50 ret <4 x float> %t 51} 52 53define <4 x double> @floor_v4f64(<4 x double> %p){ 54; CHECK-LABEL: floor_v4f64: 55; CHECK: ## %bb.0: 56; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0 57; CHECK-NEXT: retq 58 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 59 ret <4 x double> %t 60} 61 62define <8 x float> @floor_v8f32(<8 x float> %p) { 63; CHECK-LABEL: floor_v8f32: 64; CHECK: ## %bb.0: 65; CHECK-NEXT: vroundps $9, %ymm0, %ymm0 66; CHECK-NEXT: retq 67 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 68 ret <8 x float> %t 69} 70 71define <8 x double> @floor_v8f64(<8 x double> %p){ 72; CHECK-LABEL: floor_v8f64: 73; CHECK: ## %bb.0: 74; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 75; CHECK-NEXT: retq 76 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 77 ret <8 x double> %t 78} 79 80define <16 x float> @floor_v16f32(<16 x float> %p) { 81; CHECK-LABEL: floor_v16f32: 82; CHECK: ## %bb.0: 83; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 84; CHECK-NEXT: retq 85 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 86 ret <16 x float> %t 87} 88 89define <2 x double> @floor_v2f64_load(<2 x double>* %ptr) { 90; CHECK-LABEL: floor_v2f64_load: 91; CHECK: ## %bb.0: 92; CHECK-NEXT: vroundpd $9, (%rdi), %xmm0 93; CHECK-NEXT: retq 94 %p = load <2 x double>, <2 x double>* %ptr 95 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 96 ret <2 x double> %t 97} 98 99define <4 x float> @floor_v4f32_load(<4 x float>* %ptr) { 100; CHECK-LABEL: floor_v4f32_load: 101; CHECK: ## %bb.0: 102; CHECK-NEXT: vroundps $9, (%rdi), %xmm0 103; CHECK-NEXT: retq 104 %p = load <4 x float>, <4 x float>* %ptr 105 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 106 ret <4 x float> %t 107} 108 109define <4 x double> @floor_v4f64_load(<4 x double>* %ptr){ 110; CHECK-LABEL: floor_v4f64_load: 111; CHECK: ## %bb.0: 112; CHECK-NEXT: vroundpd $9, (%rdi), %ymm0 113; CHECK-NEXT: retq 114 %p = load <4 x double>, <4 x double>* %ptr 115 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 116 ret <4 x double> %t 117} 118 119define <8 x float> @floor_v8f32_load(<8 x float>* %ptr) { 120; CHECK-LABEL: floor_v8f32_load: 121; CHECK: ## %bb.0: 122; CHECK-NEXT: vroundps $9, (%rdi), %ymm0 123; CHECK-NEXT: retq 124 %p = load <8 x float>, <8 x float>* %ptr 125 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 126 ret <8 x float> %t 127} 128 129define <8 x double> @floor_v8f64_load(<8 x double>* %ptr){ 130; CHECK-LABEL: floor_v8f64_load: 131; CHECK: ## %bb.0: 132; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 133; CHECK-NEXT: retq 134 %p = load <8 x double>, <8 x double>* %ptr 135 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 136 ret <8 x double> %t 137} 138 139define <16 x float> @floor_v16f32_load(<16 x float>* %ptr) { 140; CHECK-LABEL: floor_v16f32_load: 141; CHECK: ## %bb.0: 142; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 143; CHECK-NEXT: retq 144 %p = load <16 x float>, <16 x float>* %ptr 145 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 146 ret <16 x float> %t 147} 148 149define <2 x double> @floor_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 150; CHECK-LABEL: floor_v2f64_mask: 151; CHECK: ## %bb.0: 152; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 153; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm1 {%k1} 154; CHECK-NEXT: vmovapd %xmm1, %xmm0 155; CHECK-NEXT: retq 156 %c = icmp eq <2 x i64> %cmp, zeroinitializer 157 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 158 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 159 ret <2 x double> %s 160} 161 162define <4 x float> @floor_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 163; CHECK-LABEL: floor_v4f32_mask: 164; CHECK: ## %bb.0: 165; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 166; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm1 {%k1} 167; CHECK-NEXT: vmovaps %xmm1, %xmm0 168; CHECK-NEXT: retq 169 %c = icmp eq <4 x i32> %cmp, zeroinitializer 170 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 171 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 172 ret <4 x float> %s 173} 174 175define <4 x double> @floor_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 176; CHECK-LABEL: floor_v4f64_mask: 177; CHECK: ## %bb.0: 178; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 179; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm1 {%k1} 180; CHECK-NEXT: vmovapd %ymm1, %ymm0 181; CHECK-NEXT: retq 182 %c = icmp eq <4 x i64> %cmp, zeroinitializer 183 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 184 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 185 ret <4 x double> %s 186} 187 188define <8 x float> @floor_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 189; CHECK-LABEL: floor_v8f32_mask: 190; CHECK: ## %bb.0: 191; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 192; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm1 {%k1} 193; CHECK-NEXT: vmovaps %ymm1, %ymm0 194; CHECK-NEXT: retq 195 %c = icmp eq <8 x i32> %cmp, zeroinitializer 196 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 197 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 198 ret <8 x float> %s 199} 200 201define <8 x double> @floor_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 202; CHECK-LABEL: floor_v8f64_mask: 203; CHECK: ## %bb.0: 204; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 205; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm1 {%k1} 206; CHECK-NEXT: vmovapd %zmm1, %zmm0 207; CHECK-NEXT: retq 208 %c = icmp eq <8 x i64> %cmp, zeroinitializer 209 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 210 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 211 ret <8 x double> %s 212} 213 214define <16 x float> @floor_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 215; CHECK-LABEL: floor_v16f32_mask: 216; CHECK: ## %bb.0: 217; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 218; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm1 {%k1} 219; CHECK-NEXT: vmovaps %zmm1, %zmm0 220; CHECK-NEXT: retq 221 %c = icmp eq <16 x i32> %cmp, zeroinitializer 222 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 223 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 224 ret <16 x float> %s 225} 226 227define <2 x double> @floor_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 228; CHECK-LABEL: floor_v2f64_maskz: 229; CHECK: ## %bb.0: 230; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 231; CHECK-NEXT: vrndscalepd $9, %xmm0, %xmm0 {%k1} {z} 232; CHECK-NEXT: retq 233 %c = icmp eq <2 x i64> %cmp, zeroinitializer 234 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 235 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 236 ret <2 x double> %s 237} 238 239define <4 x float> @floor_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 240; CHECK-LABEL: floor_v4f32_maskz: 241; CHECK: ## %bb.0: 242; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 243; CHECK-NEXT: vrndscaleps $9, %xmm0, %xmm0 {%k1} {z} 244; CHECK-NEXT: retq 245 %c = icmp eq <4 x i32> %cmp, zeroinitializer 246 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 247 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 248 ret <4 x float> %s 249} 250 251define <4 x double> @floor_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 252; CHECK-LABEL: floor_v4f64_maskz: 253; CHECK: ## %bb.0: 254; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 255; CHECK-NEXT: vrndscalepd $9, %ymm0, %ymm0 {%k1} {z} 256; CHECK-NEXT: retq 257 %c = icmp eq <4 x i64> %cmp, zeroinitializer 258 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 259 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 260 ret <4 x double> %s 261} 262 263define <8 x float> @floor_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 264; CHECK-LABEL: floor_v8f32_maskz: 265; CHECK: ## %bb.0: 266; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 267; CHECK-NEXT: vrndscaleps $9, %ymm0, %ymm0 {%k1} {z} 268; CHECK-NEXT: retq 269 %c = icmp eq <8 x i32> %cmp, zeroinitializer 270 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 272 ret <8 x float> %s 273} 274 275define <8 x double> @floor_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 276; CHECK-LABEL: floor_v8f64_maskz: 277; CHECK: ## %bb.0: 278; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 279; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 {%k1} {z} 280; CHECK-NEXT: retq 281 %c = icmp eq <8 x i64> %cmp, zeroinitializer 282 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 283 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 284 ret <8 x double> %s 285} 286 287define <16 x float> @floor_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 288; CHECK-LABEL: floor_v16f32_maskz: 289; CHECK: ## %bb.0: 290; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 291; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 {%k1} {z} 292; CHECK-NEXT: retq 293 %c = icmp eq <16 x i32> %cmp, zeroinitializer 294 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 295 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 296 ret <16 x float> %s 297} 298 299define <2 x double> @floor_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 300; CHECK-LABEL: floor_v2f64_mask_load: 301; CHECK: ## %bb.0: 302; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 303; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1} 304; CHECK-NEXT: retq 305 %c = icmp eq <2 x i64> %cmp, zeroinitializer 306 %p = load <2 x double>, <2 x double>* %ptr 307 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 308 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 309 ret <2 x double> %s 310} 311 312define <4 x float> @floor_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 313; CHECK-LABEL: floor_v4f32_mask_load: 314; CHECK: ## %bb.0: 315; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 316; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1} 317; CHECK-NEXT: retq 318 %c = icmp eq <4 x i32> %cmp, zeroinitializer 319 %p = load <4 x float>, <4 x float>* %ptr 320 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 321 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 322 ret <4 x float> %s 323} 324 325define <4 x double> @floor_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 326; CHECK-LABEL: floor_v4f64_mask_load: 327; CHECK: ## %bb.0: 328; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 329; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1} 330; CHECK-NEXT: retq 331 %c = icmp eq <4 x i64> %cmp, zeroinitializer 332 %p = load <4 x double>, <4 x double>* %ptr 333 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 334 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 335 ret <4 x double> %s 336} 337 338define <8 x float> @floor_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 339; CHECK-LABEL: floor_v8f32_mask_load: 340; CHECK: ## %bb.0: 341; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 342; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1} 343; CHECK-NEXT: retq 344 %c = icmp eq <8 x i32> %cmp, zeroinitializer 345 %p = load <8 x float>, <8 x float>* %ptr 346 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 347 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 348 ret <8 x float> %s 349} 350 351define <8 x double> @floor_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 352; CHECK-LABEL: floor_v8f64_mask_load: 353; CHECK: ## %bb.0: 354; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 355; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1} 356; CHECK-NEXT: retq 357 %c = icmp eq <8 x i64> %cmp, zeroinitializer 358 %p = load <8 x double>, <8 x double>* %ptr 359 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 361 ret <8 x double> %s 362} 363 364define <16 x float> @floor_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 365; CHECK-LABEL: floor_v16f32_mask_load: 366; CHECK: ## %bb.0: 367; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 368; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1} 369; CHECK-NEXT: retq 370 %c = icmp eq <16 x i32> %cmp, zeroinitializer 371 %p = load <16 x float>, <16 x float>* %ptr 372 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 373 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 374 ret <16 x float> %s 375} 376 377define <2 x double> @floor_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 378; CHECK-LABEL: floor_v2f64_maskz_load: 379; CHECK: ## %bb.0: 380; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 381; CHECK-NEXT: vrndscalepd $9, (%rdi), %xmm0 {%k1} {z} 382; CHECK-NEXT: retq 383 %c = icmp eq <2 x i64> %cmp, zeroinitializer 384 %p = load <2 x double>, <2 x double>* %ptr 385 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 386 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 387 ret <2 x double> %s 388} 389 390define <4 x float> @floor_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 391; CHECK-LABEL: floor_v4f32_maskz_load: 392; CHECK: ## %bb.0: 393; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 394; CHECK-NEXT: vrndscaleps $9, (%rdi), %xmm0 {%k1} {z} 395; CHECK-NEXT: retq 396 %c = icmp eq <4 x i32> %cmp, zeroinitializer 397 %p = load <4 x float>, <4 x float>* %ptr 398 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 399 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 400 ret <4 x float> %s 401} 402 403define <4 x double> @floor_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 404; CHECK-LABEL: floor_v4f64_maskz_load: 405; CHECK: ## %bb.0: 406; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 407; CHECK-NEXT: vrndscalepd $9, (%rdi), %ymm0 {%k1} {z} 408; CHECK-NEXT: retq 409 %c = icmp eq <4 x i64> %cmp, zeroinitializer 410 %p = load <4 x double>, <4 x double>* %ptr 411 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 412 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 413 ret <4 x double> %s 414} 415 416define <8 x float> @floor_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 417; CHECK-LABEL: floor_v8f32_maskz_load: 418; CHECK: ## %bb.0: 419; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 420; CHECK-NEXT: vrndscaleps $9, (%rdi), %ymm0 {%k1} {z} 421; CHECK-NEXT: retq 422 %c = icmp eq <8 x i32> %cmp, zeroinitializer 423 %p = load <8 x float>, <8 x float>* %ptr 424 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 425 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 426 ret <8 x float> %s 427} 428 429define <8 x double> @floor_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 430; CHECK-LABEL: floor_v8f64_maskz_load: 431; CHECK: ## %bb.0: 432; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 433; CHECK-NEXT: vrndscalepd $9, (%rdi), %zmm0 {%k1} {z} 434; CHECK-NEXT: retq 435 %c = icmp eq <8 x i64> %cmp, zeroinitializer 436 %p = load <8 x double>, <8 x double>* %ptr 437 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 438 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 439 ret <8 x double> %s 440} 441 442define <16 x float> @floor_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 443; CHECK-LABEL: floor_v16f32_maskz_load: 444; CHECK: ## %bb.0: 445; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 446; CHECK-NEXT: vrndscaleps $9, (%rdi), %zmm0 {%k1} {z} 447; CHECK-NEXT: retq 448 %c = icmp eq <16 x i32> %cmp, zeroinitializer 449 %p = load <16 x float>, <16 x float>* %ptr 450 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 451 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 452 ret <16 x float> %s 453} 454 455define <2 x double> @floor_v2f64_broadcast(double* %ptr) { 456; CHECK-LABEL: floor_v2f64_broadcast: 457; CHECK: ## %bb.0: 458; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 459; CHECK-NEXT: retq 460 %ps = load double, double* %ptr 461 %pins = insertelement <2 x double> undef, double %ps, i32 0 462 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 463 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 464 ret <2 x double> %t 465} 466 467define <4 x float> @floor_v4f32_broadcast(float* %ptr) { 468; CHECK-LABEL: floor_v4f32_broadcast: 469; CHECK: ## %bb.0: 470; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 471; CHECK-NEXT: retq 472 %ps = load float, float* %ptr 473 %pins = insertelement <4 x float> undef, float %ps, i32 0 474 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 475 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 476 ret <4 x float> %t 477} 478 479define <4 x double> @floor_v4f64_broadcast(double* %ptr){ 480; CHECK-LABEL: floor_v4f64_broadcast: 481; CHECK: ## %bb.0: 482; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 483; CHECK-NEXT: retq 484 %ps = load double, double* %ptr 485 %pins = insertelement <4 x double> undef, double %ps, i32 0 486 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 487 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 488 ret <4 x double> %t 489} 490 491define <8 x float> @floor_v8f32_broadcast(float* %ptr) { 492; CHECK-LABEL: floor_v8f32_broadcast: 493; CHECK: ## %bb.0: 494; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 495; CHECK-NEXT: retq 496 %ps = load float, float* %ptr 497 %pins = insertelement <8 x float> undef, float %ps, i32 0 498 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 499 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 500 ret <8 x float> %t 501} 502 503define <8 x double> @floor_v8f64_broadcast(double* %ptr){ 504; CHECK-LABEL: floor_v8f64_broadcast: 505; CHECK: ## %bb.0: 506; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 507; CHECK-NEXT: retq 508 %ps = load double, double* %ptr 509 %pins = insertelement <8 x double> undef, double %ps, i32 0 510 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 511 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 512 ret <8 x double> %t 513} 514 515define <16 x float> @floor_v16f32_broadcast(float* %ptr) { 516; CHECK-LABEL: floor_v16f32_broadcast: 517; CHECK: ## %bb.0: 518; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 519; CHECK-NEXT: retq 520 %ps = load float, float* %ptr 521 %pins = insertelement <16 x float> undef, float %ps, i32 0 522 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 523 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 524 ret <16 x float> %t 525} 526 527define <2 x double> @floor_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 528; CHECK-LABEL: floor_v2f64_mask_broadcast: 529; CHECK: ## %bb.0: 530; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 531; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} 532; CHECK-NEXT: retq 533 %c = icmp eq <2 x i64> %cmp, zeroinitializer 534 %ps = load double, double* %ptr 535 %pins = insertelement <2 x double> undef, double %ps, i32 0 536 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 537 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 538 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 539 ret <2 x double> %s 540} 541 542define <4 x float> @floor_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 543; CHECK-LABEL: floor_v4f32_mask_broadcast: 544; CHECK: ## %bb.0: 545; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 546; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} 547; CHECK-NEXT: retq 548 %c = icmp eq <4 x i32> %cmp, zeroinitializer 549 %ps = load float, float* %ptr 550 %pins = insertelement <4 x float> undef, float %ps, i32 0 551 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 552 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 553 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 554 ret <4 x float> %s 555} 556 557define <4 x double> @floor_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 558; CHECK-LABEL: floor_v4f64_mask_broadcast: 559; CHECK: ## %bb.0: 560; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 561; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} 562; CHECK-NEXT: retq 563 %c = icmp eq <4 x i64> %cmp, zeroinitializer 564 %ps = load double, double* %ptr 565 %pins = insertelement <4 x double> undef, double %ps, i32 0 566 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 567 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 568 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 569 ret <4 x double> %s 570} 571 572define <8 x float> @floor_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 573; CHECK-LABEL: floor_v8f32_mask_broadcast: 574; CHECK: ## %bb.0: 575; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 576; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} 577; CHECK-NEXT: retq 578 %c = icmp eq <8 x i32> %cmp, zeroinitializer 579 %ps = load float, float* %ptr 580 %pins = insertelement <8 x float> undef, float %ps, i32 0 581 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 582 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 583 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 584 ret <8 x float> %s 585} 586 587define <8 x double> @floor_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 588; CHECK-LABEL: floor_v8f64_mask_broadcast: 589; CHECK: ## %bb.0: 590; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 591; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} 592; CHECK-NEXT: retq 593 %c = icmp eq <8 x i64> %cmp, zeroinitializer 594 %ps = load double, double* %ptr 595 %pins = insertelement <8 x double> undef, double %ps, i32 0 596 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 597 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 598 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 599 ret <8 x double> %s 600} 601 602define <16 x float> @floor_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 603; CHECK-LABEL: floor_v16f32_mask_broadcast: 604; CHECK: ## %bb.0: 605; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 606; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} 607; CHECK-NEXT: retq 608 %c = icmp eq <16 x i32> %cmp, zeroinitializer 609 %ps = load float, float* %ptr 610 %pins = insertelement <16 x float> undef, float %ps, i32 0 611 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 612 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 613 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 614 ret <16 x float> %s 615} 616 617define <2 x double> @floor_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 618; CHECK-LABEL: floor_v2f64_maskz_broadcast: 619; CHECK: ## %bb.0: 620; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 621; CHECK-NEXT: vrndscalepd $9, (%rdi){1to2}, %xmm0 {%k1} {z} 622; CHECK-NEXT: retq 623 %c = icmp eq <2 x i64> %cmp, zeroinitializer 624 %ps = load double, double* %ptr 625 %pins = insertelement <2 x double> undef, double %ps, i32 0 626 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 627 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 628 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 629 ret <2 x double> %s 630} 631 632define <4 x float> @floor_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 633; CHECK-LABEL: floor_v4f32_maskz_broadcast: 634; CHECK: ## %bb.0: 635; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 636; CHECK-NEXT: vrndscaleps $9, (%rdi){1to4}, %xmm0 {%k1} {z} 637; CHECK-NEXT: retq 638 %c = icmp eq <4 x i32> %cmp, zeroinitializer 639 %ps = load float, float* %ptr 640 %pins = insertelement <4 x float> undef, float %ps, i32 0 641 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 642 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 643 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 644 ret <4 x float> %s 645} 646 647define <4 x double> @floor_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 648; CHECK-LABEL: floor_v4f64_maskz_broadcast: 649; CHECK: ## %bb.0: 650; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 651; CHECK-NEXT: vrndscalepd $9, (%rdi){1to4}, %ymm0 {%k1} {z} 652; CHECK-NEXT: retq 653 %c = icmp eq <4 x i64> %cmp, zeroinitializer 654 %ps = load double, double* %ptr 655 %pins = insertelement <4 x double> undef, double %ps, i32 0 656 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 657 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 658 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 659 ret <4 x double> %s 660} 661 662define <8 x float> @floor_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 663; CHECK-LABEL: floor_v8f32_maskz_broadcast: 664; CHECK: ## %bb.0: 665; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 666; CHECK-NEXT: vrndscaleps $9, (%rdi){1to8}, %ymm0 {%k1} {z} 667; CHECK-NEXT: retq 668 %c = icmp eq <8 x i32> %cmp, zeroinitializer 669 %ps = load float, float* %ptr 670 %pins = insertelement <8 x float> undef, float %ps, i32 0 671 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 672 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 673 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 674 ret <8 x float> %s 675} 676 677define <8 x double> @floor_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 678; CHECK-LABEL: floor_v8f64_maskz_broadcast: 679; CHECK: ## %bb.0: 680; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 681; CHECK-NEXT: vrndscalepd $9, (%rdi){1to8}, %zmm0 {%k1} {z} 682; CHECK-NEXT: retq 683 %c = icmp eq <8 x i64> %cmp, zeroinitializer 684 %ps = load double, double* %ptr 685 %pins = insertelement <8 x double> undef, double %ps, i32 0 686 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 687 %t = call <8 x double> @llvm.floor.v8f64(<8 x double> %p) 688 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 689 ret <8 x double> %s 690} 691 692define <16 x float> @floor_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 693; CHECK-LABEL: floor_v16f32_maskz_broadcast: 694; CHECK: ## %bb.0: 695; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 696; CHECK-NEXT: vrndscaleps $9, (%rdi){1to16}, %zmm0 {%k1} {z} 697; CHECK-NEXT: retq 698 %c = icmp eq <16 x i32> %cmp, zeroinitializer 699 %ps = load float, float* %ptr 700 %pins = insertelement <16 x float> undef, float %ps, i32 0 701 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 702 %t = call <16 x float> @llvm.floor.v16f32(<16 x float> %p) 703 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 704 ret <16 x float> %s 705} 706 707define <2 x double> @ceil_v2f64(<2 x double> %p) { 708; CHECK-LABEL: ceil_v2f64: 709; CHECK: ## %bb.0: 710; CHECK-NEXT: vroundpd $10, %xmm0, %xmm0 711; CHECK-NEXT: retq 712 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 713 ret <2 x double> %t 714} 715 716define <4 x float> @ceil_v4f32(<4 x float> %p) { 717; CHECK-LABEL: ceil_v4f32: 718; CHECK: ## %bb.0: 719; CHECK-NEXT: vroundps $10, %xmm0, %xmm0 720; CHECK-NEXT: retq 721 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 722 ret <4 x float> %t 723} 724 725define <4 x double> @ceil_v4f64(<4 x double> %p){ 726; CHECK-LABEL: ceil_v4f64: 727; CHECK: ## %bb.0: 728; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0 729; CHECK-NEXT: retq 730 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 731 ret <4 x double> %t 732} 733 734define <8 x float> @ceil_v8f32(<8 x float> %p) { 735; CHECK-LABEL: ceil_v8f32: 736; CHECK: ## %bb.0: 737; CHECK-NEXT: vroundps $10, %ymm0, %ymm0 738; CHECK-NEXT: retq 739 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 740 ret <8 x float> %t 741} 742 743define <8 x double> @ceil_v8f64(<8 x double> %p){ 744; CHECK-LABEL: ceil_v8f64: 745; CHECK: ## %bb.0: 746; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 747; CHECK-NEXT: retq 748 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 749 ret <8 x double> %t 750} 751 752define <16 x float> @ceil_v16f32(<16 x float> %p) { 753; CHECK-LABEL: ceil_v16f32: 754; CHECK: ## %bb.0: 755; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 756; CHECK-NEXT: retq 757 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 758 ret <16 x float> %t 759} 760 761define <2 x double> @ceil_v2f64_load(<2 x double>* %ptr) { 762; CHECK-LABEL: ceil_v2f64_load: 763; CHECK: ## %bb.0: 764; CHECK-NEXT: vroundpd $10, (%rdi), %xmm0 765; CHECK-NEXT: retq 766 %p = load <2 x double>, <2 x double>* %ptr 767 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 768 ret <2 x double> %t 769} 770 771define <4 x float> @ceil_v4f32_load(<4 x float>* %ptr) { 772; CHECK-LABEL: ceil_v4f32_load: 773; CHECK: ## %bb.0: 774; CHECK-NEXT: vroundps $10, (%rdi), %xmm0 775; CHECK-NEXT: retq 776 %p = load <4 x float>, <4 x float>* %ptr 777 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 778 ret <4 x float> %t 779} 780 781define <4 x double> @ceil_v4f64_load(<4 x double>* %ptr){ 782; CHECK-LABEL: ceil_v4f64_load: 783; CHECK: ## %bb.0: 784; CHECK-NEXT: vroundpd $10, (%rdi), %ymm0 785; CHECK-NEXT: retq 786 %p = load <4 x double>, <4 x double>* %ptr 787 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 788 ret <4 x double> %t 789} 790 791define <8 x float> @ceil_v8f32_load(<8 x float>* %ptr) { 792; CHECK-LABEL: ceil_v8f32_load: 793; CHECK: ## %bb.0: 794; CHECK-NEXT: vroundps $10, (%rdi), %ymm0 795; CHECK-NEXT: retq 796 %p = load <8 x float>, <8 x float>* %ptr 797 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 798 ret <8 x float> %t 799} 800 801define <8 x double> @ceil_v8f64_load(<8 x double>* %ptr){ 802; CHECK-LABEL: ceil_v8f64_load: 803; CHECK: ## %bb.0: 804; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 805; CHECK-NEXT: retq 806 %p = load <8 x double>, <8 x double>* %ptr 807 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 808 ret <8 x double> %t 809} 810 811define <16 x float> @ceil_v16f32_load(<16 x float>* %ptr) { 812; CHECK-LABEL: ceil_v16f32_load: 813; CHECK: ## %bb.0: 814; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 815; CHECK-NEXT: retq 816 %p = load <16 x float>, <16 x float>* %ptr 817 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 818 ret <16 x float> %t 819} 820 821define <2 x double> @ceil_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 822; CHECK-LABEL: ceil_v2f64_mask: 823; CHECK: ## %bb.0: 824; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 825; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm1 {%k1} 826; CHECK-NEXT: vmovapd %xmm1, %xmm0 827; CHECK-NEXT: retq 828 %c = icmp eq <2 x i64> %cmp, zeroinitializer 829 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 830 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 831 ret <2 x double> %s 832} 833 834define <4 x float> @ceil_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 835; CHECK-LABEL: ceil_v4f32_mask: 836; CHECK: ## %bb.0: 837; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 838; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm1 {%k1} 839; CHECK-NEXT: vmovaps %xmm1, %xmm0 840; CHECK-NEXT: retq 841 %c = icmp eq <4 x i32> %cmp, zeroinitializer 842 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 843 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 844 ret <4 x float> %s 845} 846 847define <4 x double> @ceil_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 848; CHECK-LABEL: ceil_v4f64_mask: 849; CHECK: ## %bb.0: 850; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 851; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm1 {%k1} 852; CHECK-NEXT: vmovapd %ymm1, %ymm0 853; CHECK-NEXT: retq 854 %c = icmp eq <4 x i64> %cmp, zeroinitializer 855 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 856 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 857 ret <4 x double> %s 858} 859 860define <8 x float> @ceil_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 861; CHECK-LABEL: ceil_v8f32_mask: 862; CHECK: ## %bb.0: 863; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 864; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm1 {%k1} 865; CHECK-NEXT: vmovaps %ymm1, %ymm0 866; CHECK-NEXT: retq 867 %c = icmp eq <8 x i32> %cmp, zeroinitializer 868 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 869 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 870 ret <8 x float> %s 871} 872 873define <8 x double> @ceil_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 874; CHECK-LABEL: ceil_v8f64_mask: 875; CHECK: ## %bb.0: 876; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 877; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm1 {%k1} 878; CHECK-NEXT: vmovapd %zmm1, %zmm0 879; CHECK-NEXT: retq 880 %c = icmp eq <8 x i64> %cmp, zeroinitializer 881 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 882 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 883 ret <8 x double> %s 884} 885 886define <16 x float> @ceil_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 887; CHECK-LABEL: ceil_v16f32_mask: 888; CHECK: ## %bb.0: 889; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 890; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm1 {%k1} 891; CHECK-NEXT: vmovaps %zmm1, %zmm0 892; CHECK-NEXT: retq 893 %c = icmp eq <16 x i32> %cmp, zeroinitializer 894 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 895 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 896 ret <16 x float> %s 897} 898 899define <2 x double> @ceil_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 900; CHECK-LABEL: ceil_v2f64_maskz: 901; CHECK: ## %bb.0: 902; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 903; CHECK-NEXT: vrndscalepd $10, %xmm0, %xmm0 {%k1} {z} 904; CHECK-NEXT: retq 905 %c = icmp eq <2 x i64> %cmp, zeroinitializer 906 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 907 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 908 ret <2 x double> %s 909} 910 911define <4 x float> @ceil_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 912; CHECK-LABEL: ceil_v4f32_maskz: 913; CHECK: ## %bb.0: 914; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 915; CHECK-NEXT: vrndscaleps $10, %xmm0, %xmm0 {%k1} {z} 916; CHECK-NEXT: retq 917 %c = icmp eq <4 x i32> %cmp, zeroinitializer 918 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 919 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 920 ret <4 x float> %s 921} 922 923define <4 x double> @ceil_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 924; CHECK-LABEL: ceil_v4f64_maskz: 925; CHECK: ## %bb.0: 926; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 927; CHECK-NEXT: vrndscalepd $10, %ymm0, %ymm0 {%k1} {z} 928; CHECK-NEXT: retq 929 %c = icmp eq <4 x i64> %cmp, zeroinitializer 930 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 931 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 932 ret <4 x double> %s 933} 934 935define <8 x float> @ceil_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 936; CHECK-LABEL: ceil_v8f32_maskz: 937; CHECK: ## %bb.0: 938; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 939; CHECK-NEXT: vrndscaleps $10, %ymm0, %ymm0 {%k1} {z} 940; CHECK-NEXT: retq 941 %c = icmp eq <8 x i32> %cmp, zeroinitializer 942 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 943 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 944 ret <8 x float> %s 945} 946 947define <8 x double> @ceil_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 948; CHECK-LABEL: ceil_v8f64_maskz: 949; CHECK: ## %bb.0: 950; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 951; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 {%k1} {z} 952; CHECK-NEXT: retq 953 %c = icmp eq <8 x i64> %cmp, zeroinitializer 954 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 955 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 956 ret <8 x double> %s 957} 958 959define <16 x float> @ceil_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 960; CHECK-LABEL: ceil_v16f32_maskz: 961; CHECK: ## %bb.0: 962; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 963; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 {%k1} {z} 964; CHECK-NEXT: retq 965 %c = icmp eq <16 x i32> %cmp, zeroinitializer 966 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 967 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 968 ret <16 x float> %s 969} 970 971define <2 x double> @ceil_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 972; CHECK-LABEL: ceil_v2f64_mask_load: 973; CHECK: ## %bb.0: 974; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 975; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1} 976; CHECK-NEXT: retq 977 %c = icmp eq <2 x i64> %cmp, zeroinitializer 978 %p = load <2 x double>, <2 x double>* %ptr 979 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 980 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 981 ret <2 x double> %s 982} 983 984define <4 x float> @ceil_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 985; CHECK-LABEL: ceil_v4f32_mask_load: 986; CHECK: ## %bb.0: 987; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 988; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1} 989; CHECK-NEXT: retq 990 %c = icmp eq <4 x i32> %cmp, zeroinitializer 991 %p = load <4 x float>, <4 x float>* %ptr 992 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 993 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 994 ret <4 x float> %s 995} 996 997define <4 x double> @ceil_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 998; CHECK-LABEL: ceil_v4f64_mask_load: 999; CHECK: ## %bb.0: 1000; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1001; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1} 1002; CHECK-NEXT: retq 1003 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1004 %p = load <4 x double>, <4 x double>* %ptr 1005 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1006 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1007 ret <4 x double> %s 1008} 1009 1010define <8 x float> @ceil_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1011; CHECK-LABEL: ceil_v8f32_mask_load: 1012; CHECK: ## %bb.0: 1013; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1014; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1} 1015; CHECK-NEXT: retq 1016 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1017 %p = load <8 x float>, <8 x float>* %ptr 1018 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1019 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1020 ret <8 x float> %s 1021} 1022 1023define <8 x double> @ceil_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1024; CHECK-LABEL: ceil_v8f64_mask_load: 1025; CHECK: ## %bb.0: 1026; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1027; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1} 1028; CHECK-NEXT: retq 1029 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1030 %p = load <8 x double>, <8 x double>* %ptr 1031 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1033 ret <8 x double> %s 1034} 1035 1036define <16 x float> @ceil_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1037; CHECK-LABEL: ceil_v16f32_mask_load: 1038; CHECK: ## %bb.0: 1039; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1040; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1} 1041; CHECK-NEXT: retq 1042 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1043 %p = load <16 x float>, <16 x float>* %ptr 1044 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1045 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1046 ret <16 x float> %s 1047} 1048 1049define <2 x double> @ceil_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 1050; CHECK-LABEL: ceil_v2f64_maskz_load: 1051; CHECK: ## %bb.0: 1052; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1053; CHECK-NEXT: vrndscalepd $10, (%rdi), %xmm0 {%k1} {z} 1054; CHECK-NEXT: retq 1055 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1056 %p = load <2 x double>, <2 x double>* %ptr 1057 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1058 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1059 ret <2 x double> %s 1060} 1061 1062define <4 x float> @ceil_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 1063; CHECK-LABEL: ceil_v4f32_maskz_load: 1064; CHECK: ## %bb.0: 1065; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1066; CHECK-NEXT: vrndscaleps $10, (%rdi), %xmm0 {%k1} {z} 1067; CHECK-NEXT: retq 1068 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1069 %p = load <4 x float>, <4 x float>* %ptr 1070 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1071 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1072 ret <4 x float> %s 1073} 1074 1075define <4 x double> @ceil_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 1076; CHECK-LABEL: ceil_v4f64_maskz_load: 1077; CHECK: ## %bb.0: 1078; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1079; CHECK-NEXT: vrndscalepd $10, (%rdi), %ymm0 {%k1} {z} 1080; CHECK-NEXT: retq 1081 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1082 %p = load <4 x double>, <4 x double>* %ptr 1083 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1084 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1085 ret <4 x double> %s 1086} 1087 1088define <8 x float> @ceil_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 1089; CHECK-LABEL: ceil_v8f32_maskz_load: 1090; CHECK: ## %bb.0: 1091; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1092; CHECK-NEXT: vrndscaleps $10, (%rdi), %ymm0 {%k1} {z} 1093; CHECK-NEXT: retq 1094 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1095 %p = load <8 x float>, <8 x float>* %ptr 1096 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1097 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1098 ret <8 x float> %s 1099} 1100 1101define <8 x double> @ceil_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 1102; CHECK-LABEL: ceil_v8f64_maskz_load: 1103; CHECK: ## %bb.0: 1104; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1105; CHECK-NEXT: vrndscalepd $10, (%rdi), %zmm0 {%k1} {z} 1106; CHECK-NEXT: retq 1107 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1108 %p = load <8 x double>, <8 x double>* %ptr 1109 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1110 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1111 ret <8 x double> %s 1112} 1113 1114define <16 x float> @ceil_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 1115; CHECK-LABEL: ceil_v16f32_maskz_load: 1116; CHECK: ## %bb.0: 1117; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1118; CHECK-NEXT: vrndscaleps $10, (%rdi), %zmm0 {%k1} {z} 1119; CHECK-NEXT: retq 1120 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1121 %p = load <16 x float>, <16 x float>* %ptr 1122 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1123 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1124 ret <16 x float> %s 1125} 1126 1127define <2 x double> @ceil_v2f64_broadcast(double* %ptr) { 1128; CHECK-LABEL: ceil_v2f64_broadcast: 1129; CHECK: ## %bb.0: 1130; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 1131; CHECK-NEXT: retq 1132 %ps = load double, double* %ptr 1133 %pins = insertelement <2 x double> undef, double %ps, i32 0 1134 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1135 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1136 ret <2 x double> %t 1137} 1138 1139define <4 x float> @ceil_v4f32_broadcast(float* %ptr) { 1140; CHECK-LABEL: ceil_v4f32_broadcast: 1141; CHECK: ## %bb.0: 1142; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 1143; CHECK-NEXT: retq 1144 %ps = load float, float* %ptr 1145 %pins = insertelement <4 x float> undef, float %ps, i32 0 1146 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1147 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1148 ret <4 x float> %t 1149} 1150 1151define <4 x double> @ceil_v4f64_broadcast(double* %ptr){ 1152; CHECK-LABEL: ceil_v4f64_broadcast: 1153; CHECK: ## %bb.0: 1154; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 1155; CHECK-NEXT: retq 1156 %ps = load double, double* %ptr 1157 %pins = insertelement <4 x double> undef, double %ps, i32 0 1158 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1159 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1160 ret <4 x double> %t 1161} 1162 1163define <8 x float> @ceil_v8f32_broadcast(float* %ptr) { 1164; CHECK-LABEL: ceil_v8f32_broadcast: 1165; CHECK: ## %bb.0: 1166; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 1167; CHECK-NEXT: retq 1168 %ps = load float, float* %ptr 1169 %pins = insertelement <8 x float> undef, float %ps, i32 0 1170 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1171 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1172 ret <8 x float> %t 1173} 1174 1175define <8 x double> @ceil_v8f64_broadcast(double* %ptr){ 1176; CHECK-LABEL: ceil_v8f64_broadcast: 1177; CHECK: ## %bb.0: 1178; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 1179; CHECK-NEXT: retq 1180 %ps = load double, double* %ptr 1181 %pins = insertelement <8 x double> undef, double %ps, i32 0 1182 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1183 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1184 ret <8 x double> %t 1185} 1186 1187define <16 x float> @ceil_v16f32_broadcast(float* %ptr) { 1188; CHECK-LABEL: ceil_v16f32_broadcast: 1189; CHECK: ## %bb.0: 1190; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 1191; CHECK-NEXT: retq 1192 %ps = load float, float* %ptr 1193 %pins = insertelement <16 x float> undef, float %ps, i32 0 1194 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1195 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1196 ret <16 x float> %t 1197} 1198 1199define <2 x double> @ceil_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1200; CHECK-LABEL: ceil_v2f64_mask_broadcast: 1201; CHECK: ## %bb.0: 1202; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1203; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} 1204; CHECK-NEXT: retq 1205 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1206 %ps = load double, double* %ptr 1207 %pins = insertelement <2 x double> undef, double %ps, i32 0 1208 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1209 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1210 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1211 ret <2 x double> %s 1212} 1213 1214define <4 x float> @ceil_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1215; CHECK-LABEL: ceil_v4f32_mask_broadcast: 1216; CHECK: ## %bb.0: 1217; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1218; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} 1219; CHECK-NEXT: retq 1220 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1221 %ps = load float, float* %ptr 1222 %pins = insertelement <4 x float> undef, float %ps, i32 0 1223 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1224 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1225 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1226 ret <4 x float> %s 1227} 1228 1229define <4 x double> @ceil_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1230; CHECK-LABEL: ceil_v4f64_mask_broadcast: 1231; CHECK: ## %bb.0: 1232; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1233; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} 1234; CHECK-NEXT: retq 1235 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1236 %ps = load double, double* %ptr 1237 %pins = insertelement <4 x double> undef, double %ps, i32 0 1238 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1239 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1240 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1241 ret <4 x double> %s 1242} 1243 1244define <8 x float> @ceil_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1245; CHECK-LABEL: ceil_v8f32_mask_broadcast: 1246; CHECK: ## %bb.0: 1247; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1248; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} 1249; CHECK-NEXT: retq 1250 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1251 %ps = load float, float* %ptr 1252 %pins = insertelement <8 x float> undef, float %ps, i32 0 1253 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1254 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1255 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1256 ret <8 x float> %s 1257} 1258 1259define <8 x double> @ceil_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1260; CHECK-LABEL: ceil_v8f64_mask_broadcast: 1261; CHECK: ## %bb.0: 1262; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1263; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} 1264; CHECK-NEXT: retq 1265 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1266 %ps = load double, double* %ptr 1267 %pins = insertelement <8 x double> undef, double %ps, i32 0 1268 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1269 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1270 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1271 ret <8 x double> %s 1272} 1273 1274define <16 x float> @ceil_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1275; CHECK-LABEL: ceil_v16f32_mask_broadcast: 1276; CHECK: ## %bb.0: 1277; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1278; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} 1279; CHECK-NEXT: retq 1280 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1281 %ps = load float, float* %ptr 1282 %pins = insertelement <16 x float> undef, float %ps, i32 0 1283 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1284 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1285 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1286 ret <16 x float> %s 1287} 1288 1289define <2 x double> @ceil_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 1290; CHECK-LABEL: ceil_v2f64_maskz_broadcast: 1291; CHECK: ## %bb.0: 1292; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1293; CHECK-NEXT: vrndscalepd $10, (%rdi){1to2}, %xmm0 {%k1} {z} 1294; CHECK-NEXT: retq 1295 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1296 %ps = load double, double* %ptr 1297 %pins = insertelement <2 x double> undef, double %ps, i32 0 1298 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1299 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 1300 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1301 ret <2 x double> %s 1302} 1303 1304define <4 x float> @ceil_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 1305; CHECK-LABEL: ceil_v4f32_maskz_broadcast: 1306; CHECK: ## %bb.0: 1307; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1308; CHECK-NEXT: vrndscaleps $10, (%rdi){1to4}, %xmm0 {%k1} {z} 1309; CHECK-NEXT: retq 1310 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1311 %ps = load float, float* %ptr 1312 %pins = insertelement <4 x float> undef, float %ps, i32 0 1313 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1314 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 1315 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1316 ret <4 x float> %s 1317} 1318 1319define <4 x double> @ceil_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 1320; CHECK-LABEL: ceil_v4f64_maskz_broadcast: 1321; CHECK: ## %bb.0: 1322; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1323; CHECK-NEXT: vrndscalepd $10, (%rdi){1to4}, %ymm0 {%k1} {z} 1324; CHECK-NEXT: retq 1325 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1326 %ps = load double, double* %ptr 1327 %pins = insertelement <4 x double> undef, double %ps, i32 0 1328 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1329 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 1330 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1331 ret <4 x double> %s 1332} 1333 1334define <8 x float> @ceil_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 1335; CHECK-LABEL: ceil_v8f32_maskz_broadcast: 1336; CHECK: ## %bb.0: 1337; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1338; CHECK-NEXT: vrndscaleps $10, (%rdi){1to8}, %ymm0 {%k1} {z} 1339; CHECK-NEXT: retq 1340 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1341 %ps = load float, float* %ptr 1342 %pins = insertelement <8 x float> undef, float %ps, i32 0 1343 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1344 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 1345 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1346 ret <8 x float> %s 1347} 1348 1349define <8 x double> @ceil_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 1350; CHECK-LABEL: ceil_v8f64_maskz_broadcast: 1351; CHECK: ## %bb.0: 1352; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1353; CHECK-NEXT: vrndscalepd $10, (%rdi){1to8}, %zmm0 {%k1} {z} 1354; CHECK-NEXT: retq 1355 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1356 %ps = load double, double* %ptr 1357 %pins = insertelement <8 x double> undef, double %ps, i32 0 1358 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1359 %t = call <8 x double> @llvm.ceil.v8f64(<8 x double> %p) 1360 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1361 ret <8 x double> %s 1362} 1363 1364define <16 x float> @ceil_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 1365; CHECK-LABEL: ceil_v16f32_maskz_broadcast: 1366; CHECK: ## %bb.0: 1367; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1368; CHECK-NEXT: vrndscaleps $10, (%rdi){1to16}, %zmm0 {%k1} {z} 1369; CHECK-NEXT: retq 1370 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1371 %ps = load float, float* %ptr 1372 %pins = insertelement <16 x float> undef, float %ps, i32 0 1373 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1374 %t = call <16 x float> @llvm.ceil.v16f32(<16 x float> %p) 1375 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1376 ret <16 x float> %s 1377} 1378 1379define <2 x double> @trunc_v2f64(<2 x double> %p) { 1380; CHECK-LABEL: trunc_v2f64: 1381; CHECK: ## %bb.0: 1382; CHECK-NEXT: vroundpd $11, %xmm0, %xmm0 1383; CHECK-NEXT: retq 1384 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1385 ret <2 x double> %t 1386} 1387 1388define <4 x float> @trunc_v4f32(<4 x float> %p) { 1389; CHECK-LABEL: trunc_v4f32: 1390; CHECK: ## %bb.0: 1391; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 1392; CHECK-NEXT: retq 1393 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1394 ret <4 x float> %t 1395} 1396 1397define <4 x double> @trunc_v4f64(<4 x double> %p){ 1398; CHECK-LABEL: trunc_v4f64: 1399; CHECK: ## %bb.0: 1400; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 1401; CHECK-NEXT: retq 1402 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1403 ret <4 x double> %t 1404} 1405 1406define <8 x float> @trunc_v8f32(<8 x float> %p) { 1407; CHECK-LABEL: trunc_v8f32: 1408; CHECK: ## %bb.0: 1409; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 1410; CHECK-NEXT: retq 1411 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1412 ret <8 x float> %t 1413} 1414 1415define <8 x double> @trunc_v8f64(<8 x double> %p){ 1416; CHECK-LABEL: trunc_v8f64: 1417; CHECK: ## %bb.0: 1418; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 1419; CHECK-NEXT: retq 1420 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1421 ret <8 x double> %t 1422} 1423 1424define <16 x float> @trunc_v16f32(<16 x float> %p) { 1425; CHECK-LABEL: trunc_v16f32: 1426; CHECK: ## %bb.0: 1427; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 1428; CHECK-NEXT: retq 1429 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1430 ret <16 x float> %t 1431} 1432 1433define <2 x double> @trunc_v2f64_load(<2 x double>* %ptr) { 1434; CHECK-LABEL: trunc_v2f64_load: 1435; CHECK: ## %bb.0: 1436; CHECK-NEXT: vroundpd $11, (%rdi), %xmm0 1437; CHECK-NEXT: retq 1438 %p = load <2 x double>, <2 x double>* %ptr 1439 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1440 ret <2 x double> %t 1441} 1442 1443define <4 x float> @trunc_v4f32_load(<4 x float>* %ptr) { 1444; CHECK-LABEL: trunc_v4f32_load: 1445; CHECK: ## %bb.0: 1446; CHECK-NEXT: vroundps $11, (%rdi), %xmm0 1447; CHECK-NEXT: retq 1448 %p = load <4 x float>, <4 x float>* %ptr 1449 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1450 ret <4 x float> %t 1451} 1452 1453define <4 x double> @trunc_v4f64_load(<4 x double>* %ptr){ 1454; CHECK-LABEL: trunc_v4f64_load: 1455; CHECK: ## %bb.0: 1456; CHECK-NEXT: vroundpd $11, (%rdi), %ymm0 1457; CHECK-NEXT: retq 1458 %p = load <4 x double>, <4 x double>* %ptr 1459 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1460 ret <4 x double> %t 1461} 1462 1463define <8 x float> @trunc_v8f32_load(<8 x float>* %ptr) { 1464; CHECK-LABEL: trunc_v8f32_load: 1465; CHECK: ## %bb.0: 1466; CHECK-NEXT: vroundps $11, (%rdi), %ymm0 1467; CHECK-NEXT: retq 1468 %p = load <8 x float>, <8 x float>* %ptr 1469 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1470 ret <8 x float> %t 1471} 1472 1473define <8 x double> @trunc_v8f64_load(<8 x double>* %ptr){ 1474; CHECK-LABEL: trunc_v8f64_load: 1475; CHECK: ## %bb.0: 1476; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 1477; CHECK-NEXT: retq 1478 %p = load <8 x double>, <8 x double>* %ptr 1479 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1480 ret <8 x double> %t 1481} 1482 1483define <16 x float> @trunc_v16f32_load(<16 x float>* %ptr) { 1484; CHECK-LABEL: trunc_v16f32_load: 1485; CHECK: ## %bb.0: 1486; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 1487; CHECK-NEXT: retq 1488 %p = load <16 x float>, <16 x float>* %ptr 1489 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1490 ret <16 x float> %t 1491} 1492 1493define <2 x double> @trunc_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 1494; CHECK-LABEL: trunc_v2f64_mask: 1495; CHECK: ## %bb.0: 1496; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 1497; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm1 {%k1} 1498; CHECK-NEXT: vmovapd %xmm1, %xmm0 1499; CHECK-NEXT: retq 1500 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1501 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1502 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1503 ret <2 x double> %s 1504} 1505 1506define <4 x float> @trunc_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 1507; CHECK-LABEL: trunc_v4f32_mask: 1508; CHECK: ## %bb.0: 1509; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 1510; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm1 {%k1} 1511; CHECK-NEXT: vmovaps %xmm1, %xmm0 1512; CHECK-NEXT: retq 1513 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1514 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1515 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1516 ret <4 x float> %s 1517} 1518 1519define <4 x double> @trunc_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 1520; CHECK-LABEL: trunc_v4f64_mask: 1521; CHECK: ## %bb.0: 1522; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 1523; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm1 {%k1} 1524; CHECK-NEXT: vmovapd %ymm1, %ymm0 1525; CHECK-NEXT: retq 1526 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1527 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1528 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1529 ret <4 x double> %s 1530} 1531 1532define <8 x float> @trunc_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 1533; CHECK-LABEL: trunc_v8f32_mask: 1534; CHECK: ## %bb.0: 1535; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 1536; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm1 {%k1} 1537; CHECK-NEXT: vmovaps %ymm1, %ymm0 1538; CHECK-NEXT: retq 1539 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1540 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1541 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1542 ret <8 x float> %s 1543} 1544 1545define <8 x double> @trunc_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 1546; CHECK-LABEL: trunc_v8f64_mask: 1547; CHECK: ## %bb.0: 1548; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 1549; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm1 {%k1} 1550; CHECK-NEXT: vmovapd %zmm1, %zmm0 1551; CHECK-NEXT: retq 1552 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1553 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1554 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1555 ret <8 x double> %s 1556} 1557 1558define <16 x float> @trunc_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 1559; CHECK-LABEL: trunc_v16f32_mask: 1560; CHECK: ## %bb.0: 1561; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 1562; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm1 {%k1} 1563; CHECK-NEXT: vmovaps %zmm1, %zmm0 1564; CHECK-NEXT: retq 1565 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1566 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1567 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1568 ret <16 x float> %s 1569} 1570 1571define <2 x double> @trunc_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 1572; CHECK-LABEL: trunc_v2f64_maskz: 1573; CHECK: ## %bb.0: 1574; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1575; CHECK-NEXT: vrndscalepd $11, %xmm0, %xmm0 {%k1} {z} 1576; CHECK-NEXT: retq 1577 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1578 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1579 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1580 ret <2 x double> %s 1581} 1582 1583define <4 x float> @trunc_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 1584; CHECK-LABEL: trunc_v4f32_maskz: 1585; CHECK: ## %bb.0: 1586; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1587; CHECK-NEXT: vrndscaleps $11, %xmm0, %xmm0 {%k1} {z} 1588; CHECK-NEXT: retq 1589 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1590 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1591 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1592 ret <4 x float> %s 1593} 1594 1595define <4 x double> @trunc_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 1596; CHECK-LABEL: trunc_v4f64_maskz: 1597; CHECK: ## %bb.0: 1598; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1599; CHECK-NEXT: vrndscalepd $11, %ymm0, %ymm0 {%k1} {z} 1600; CHECK-NEXT: retq 1601 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1602 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1603 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1604 ret <4 x double> %s 1605} 1606 1607define <8 x float> @trunc_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 1608; CHECK-LABEL: trunc_v8f32_maskz: 1609; CHECK: ## %bb.0: 1610; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1611; CHECK-NEXT: vrndscaleps $11, %ymm0, %ymm0 {%k1} {z} 1612; CHECK-NEXT: retq 1613 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1614 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1615 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1616 ret <8 x float> %s 1617} 1618 1619define <8 x double> @trunc_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 1620; CHECK-LABEL: trunc_v8f64_maskz: 1621; CHECK: ## %bb.0: 1622; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1623; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 {%k1} {z} 1624; CHECK-NEXT: retq 1625 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1626 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1627 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1628 ret <8 x double> %s 1629} 1630 1631define <16 x float> @trunc_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 1632; CHECK-LABEL: trunc_v16f32_maskz: 1633; CHECK: ## %bb.0: 1634; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1635; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 {%k1} {z} 1636; CHECK-NEXT: retq 1637 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1638 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1639 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1640 ret <16 x float> %s 1641} 1642 1643define <2 x double> @trunc_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1644; CHECK-LABEL: trunc_v2f64_mask_load: 1645; CHECK: ## %bb.0: 1646; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1647; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1} 1648; CHECK-NEXT: retq 1649 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1650 %p = load <2 x double>, <2 x double>* %ptr 1651 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1652 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1653 ret <2 x double> %s 1654} 1655 1656define <4 x float> @trunc_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1657; CHECK-LABEL: trunc_v4f32_mask_load: 1658; CHECK: ## %bb.0: 1659; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1660; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1} 1661; CHECK-NEXT: retq 1662 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1663 %p = load <4 x float>, <4 x float>* %ptr 1664 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1665 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1666 ret <4 x float> %s 1667} 1668 1669define <4 x double> @trunc_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1670; CHECK-LABEL: trunc_v4f64_mask_load: 1671; CHECK: ## %bb.0: 1672; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1673; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1} 1674; CHECK-NEXT: retq 1675 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1676 %p = load <4 x double>, <4 x double>* %ptr 1677 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1678 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1679 ret <4 x double> %s 1680} 1681 1682define <8 x float> @trunc_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1683; CHECK-LABEL: trunc_v8f32_mask_load: 1684; CHECK: ## %bb.0: 1685; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1686; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1} 1687; CHECK-NEXT: retq 1688 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1689 %p = load <8 x float>, <8 x float>* %ptr 1690 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1691 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1692 ret <8 x float> %s 1693} 1694 1695define <8 x double> @trunc_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1696; CHECK-LABEL: trunc_v8f64_mask_load: 1697; CHECK: ## %bb.0: 1698; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1699; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1} 1700; CHECK-NEXT: retq 1701 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1702 %p = load <8 x double>, <8 x double>* %ptr 1703 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1705 ret <8 x double> %s 1706} 1707 1708define <16 x float> @trunc_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1709; CHECK-LABEL: trunc_v16f32_mask_load: 1710; CHECK: ## %bb.0: 1711; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1712; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1} 1713; CHECK-NEXT: retq 1714 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1715 %p = load <16 x float>, <16 x float>* %ptr 1716 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1717 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1718 ret <16 x float> %s 1719} 1720 1721define <2 x double> @trunc_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 1722; CHECK-LABEL: trunc_v2f64_maskz_load: 1723; CHECK: ## %bb.0: 1724; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1725; CHECK-NEXT: vrndscalepd $11, (%rdi), %xmm0 {%k1} {z} 1726; CHECK-NEXT: retq 1727 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1728 %p = load <2 x double>, <2 x double>* %ptr 1729 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1730 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1731 ret <2 x double> %s 1732} 1733 1734define <4 x float> @trunc_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 1735; CHECK-LABEL: trunc_v4f32_maskz_load: 1736; CHECK: ## %bb.0: 1737; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1738; CHECK-NEXT: vrndscaleps $11, (%rdi), %xmm0 {%k1} {z} 1739; CHECK-NEXT: retq 1740 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1741 %p = load <4 x float>, <4 x float>* %ptr 1742 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1743 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1744 ret <4 x float> %s 1745} 1746 1747define <4 x double> @trunc_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 1748; CHECK-LABEL: trunc_v4f64_maskz_load: 1749; CHECK: ## %bb.0: 1750; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1751; CHECK-NEXT: vrndscalepd $11, (%rdi), %ymm0 {%k1} {z} 1752; CHECK-NEXT: retq 1753 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1754 %p = load <4 x double>, <4 x double>* %ptr 1755 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1756 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 1757 ret <4 x double> %s 1758} 1759 1760define <8 x float> @trunc_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 1761; CHECK-LABEL: trunc_v8f32_maskz_load: 1762; CHECK: ## %bb.0: 1763; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 1764; CHECK-NEXT: vrndscaleps $11, (%rdi), %ymm0 {%k1} {z} 1765; CHECK-NEXT: retq 1766 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1767 %p = load <8 x float>, <8 x float>* %ptr 1768 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1769 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 1770 ret <8 x float> %s 1771} 1772 1773define <8 x double> @trunc_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 1774; CHECK-LABEL: trunc_v8f64_maskz_load: 1775; CHECK: ## %bb.0: 1776; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1777; CHECK-NEXT: vrndscalepd $11, (%rdi), %zmm0 {%k1} {z} 1778; CHECK-NEXT: retq 1779 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1780 %p = load <8 x double>, <8 x double>* %ptr 1781 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1782 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 1783 ret <8 x double> %s 1784} 1785 1786define <16 x float> @trunc_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 1787; CHECK-LABEL: trunc_v16f32_maskz_load: 1788; CHECK: ## %bb.0: 1789; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1790; CHECK-NEXT: vrndscaleps $11, (%rdi), %zmm0 {%k1} {z} 1791; CHECK-NEXT: retq 1792 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1793 %p = load <16 x float>, <16 x float>* %ptr 1794 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1795 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 1796 ret <16 x float> %s 1797} 1798 1799define <2 x double> @trunc_v2f64_broadcast(double* %ptr) { 1800; CHECK-LABEL: trunc_v2f64_broadcast: 1801; CHECK: ## %bb.0: 1802; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 1803; CHECK-NEXT: retq 1804 %ps = load double, double* %ptr 1805 %pins = insertelement <2 x double> undef, double %ps, i32 0 1806 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1807 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1808 ret <2 x double> %t 1809} 1810 1811define <4 x float> @trunc_v4f32_broadcast(float* %ptr) { 1812; CHECK-LABEL: trunc_v4f32_broadcast: 1813; CHECK: ## %bb.0: 1814; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 1815; CHECK-NEXT: retq 1816 %ps = load float, float* %ptr 1817 %pins = insertelement <4 x float> undef, float %ps, i32 0 1818 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1819 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1820 ret <4 x float> %t 1821} 1822 1823define <4 x double> @trunc_v4f64_broadcast(double* %ptr){ 1824; CHECK-LABEL: trunc_v4f64_broadcast: 1825; CHECK: ## %bb.0: 1826; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 1827; CHECK-NEXT: retq 1828 %ps = load double, double* %ptr 1829 %pins = insertelement <4 x double> undef, double %ps, i32 0 1830 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1831 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1832 ret <4 x double> %t 1833} 1834 1835define <8 x float> @trunc_v8f32_broadcast(float* %ptr) { 1836; CHECK-LABEL: trunc_v8f32_broadcast: 1837; CHECK: ## %bb.0: 1838; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 1839; CHECK-NEXT: retq 1840 %ps = load float, float* %ptr 1841 %pins = insertelement <8 x float> undef, float %ps, i32 0 1842 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1843 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1844 ret <8 x float> %t 1845} 1846 1847define <8 x double> @trunc_v8f64_broadcast(double* %ptr){ 1848; CHECK-LABEL: trunc_v8f64_broadcast: 1849; CHECK: ## %bb.0: 1850; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 1851; CHECK-NEXT: retq 1852 %ps = load double, double* %ptr 1853 %pins = insertelement <8 x double> undef, double %ps, i32 0 1854 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1855 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1856 ret <8 x double> %t 1857} 1858 1859define <16 x float> @trunc_v16f32_broadcast(float* %ptr) { 1860; CHECK-LABEL: trunc_v16f32_broadcast: 1861; CHECK: ## %bb.0: 1862; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 1863; CHECK-NEXT: retq 1864 %ps = load float, float* %ptr 1865 %pins = insertelement <16 x float> undef, float %ps, i32 0 1866 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1867 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1868 ret <16 x float> %t 1869} 1870 1871define <2 x double> @trunc_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 1872; CHECK-LABEL: trunc_v2f64_mask_broadcast: 1873; CHECK: ## %bb.0: 1874; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 1875; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} 1876; CHECK-NEXT: retq 1877 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1878 %ps = load double, double* %ptr 1879 %pins = insertelement <2 x double> undef, double %ps, i32 0 1880 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1881 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1882 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 1883 ret <2 x double> %s 1884} 1885 1886define <4 x float> @trunc_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 1887; CHECK-LABEL: trunc_v4f32_mask_broadcast: 1888; CHECK: ## %bb.0: 1889; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 1890; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} 1891; CHECK-NEXT: retq 1892 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1893 %ps = load float, float* %ptr 1894 %pins = insertelement <4 x float> undef, float %ps, i32 0 1895 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1896 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1897 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 1898 ret <4 x float> %s 1899} 1900 1901define <4 x double> @trunc_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 1902; CHECK-LABEL: trunc_v4f64_mask_broadcast: 1903; CHECK: ## %bb.0: 1904; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 1905; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} 1906; CHECK-NEXT: retq 1907 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1908 %ps = load double, double* %ptr 1909 %pins = insertelement <4 x double> undef, double %ps, i32 0 1910 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 1911 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 1912 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 1913 ret <4 x double> %s 1914} 1915 1916define <8 x float> @trunc_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 1917; CHECK-LABEL: trunc_v8f32_mask_broadcast: 1918; CHECK: ## %bb.0: 1919; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 1920; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} 1921; CHECK-NEXT: retq 1922 %c = icmp eq <8 x i32> %cmp, zeroinitializer 1923 %ps = load float, float* %ptr 1924 %pins = insertelement <8 x float> undef, float %ps, i32 0 1925 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 1926 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 1927 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 1928 ret <8 x float> %s 1929} 1930 1931define <8 x double> @trunc_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 1932; CHECK-LABEL: trunc_v8f64_mask_broadcast: 1933; CHECK: ## %bb.0: 1934; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1935; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} 1936; CHECK-NEXT: retq 1937 %c = icmp eq <8 x i64> %cmp, zeroinitializer 1938 %ps = load double, double* %ptr 1939 %pins = insertelement <8 x double> undef, double %ps, i32 0 1940 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 1941 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 1942 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 1943 ret <8 x double> %s 1944} 1945 1946define <16 x float> @trunc_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 1947; CHECK-LABEL: trunc_v16f32_mask_broadcast: 1948; CHECK: ## %bb.0: 1949; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1950; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} 1951; CHECK-NEXT: retq 1952 %c = icmp eq <16 x i32> %cmp, zeroinitializer 1953 %ps = load float, float* %ptr 1954 %pins = insertelement <16 x float> undef, float %ps, i32 0 1955 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 1956 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 1957 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 1958 ret <16 x float> %s 1959} 1960 1961define <2 x double> @trunc_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 1962; CHECK-LABEL: trunc_v2f64_maskz_broadcast: 1963; CHECK: ## %bb.0: 1964; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 1965; CHECK-NEXT: vrndscalepd $11, (%rdi){1to2}, %xmm0 {%k1} {z} 1966; CHECK-NEXT: retq 1967 %c = icmp eq <2 x i64> %cmp, zeroinitializer 1968 %ps = load double, double* %ptr 1969 %pins = insertelement <2 x double> undef, double %ps, i32 0 1970 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 1971 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 1972 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 1973 ret <2 x double> %s 1974} 1975 1976define <4 x float> @trunc_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 1977; CHECK-LABEL: trunc_v4f32_maskz_broadcast: 1978; CHECK: ## %bb.0: 1979; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 1980; CHECK-NEXT: vrndscaleps $11, (%rdi){1to4}, %xmm0 {%k1} {z} 1981; CHECK-NEXT: retq 1982 %c = icmp eq <4 x i32> %cmp, zeroinitializer 1983 %ps = load float, float* %ptr 1984 %pins = insertelement <4 x float> undef, float %ps, i32 0 1985 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 1986 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 1987 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 1988 ret <4 x float> %s 1989} 1990 1991define <4 x double> @trunc_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 1992; CHECK-LABEL: trunc_v4f64_maskz_broadcast: 1993; CHECK: ## %bb.0: 1994; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 1995; CHECK-NEXT: vrndscalepd $11, (%rdi){1to4}, %ymm0 {%k1} {z} 1996; CHECK-NEXT: retq 1997 %c = icmp eq <4 x i64> %cmp, zeroinitializer 1998 %ps = load double, double* %ptr 1999 %pins = insertelement <4 x double> undef, double %ps, i32 0 2000 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2001 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 2002 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2003 ret <4 x double> %s 2004} 2005 2006define <8 x float> @trunc_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 2007; CHECK-LABEL: trunc_v8f32_maskz_broadcast: 2008; CHECK: ## %bb.0: 2009; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2010; CHECK-NEXT: vrndscaleps $11, (%rdi){1to8}, %ymm0 {%k1} {z} 2011; CHECK-NEXT: retq 2012 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2013 %ps = load float, float* %ptr 2014 %pins = insertelement <8 x float> undef, float %ps, i32 0 2015 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2016 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 2017 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2018 ret <8 x float> %s 2019} 2020 2021define <8 x double> @trunc_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 2022; CHECK-LABEL: trunc_v8f64_maskz_broadcast: 2023; CHECK: ## %bb.0: 2024; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2025; CHECK-NEXT: vrndscalepd $11, (%rdi){1to8}, %zmm0 {%k1} {z} 2026; CHECK-NEXT: retq 2027 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2028 %ps = load double, double* %ptr 2029 %pins = insertelement <8 x double> undef, double %ps, i32 0 2030 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2031 %t = call <8 x double> @llvm.trunc.v8f64(<8 x double> %p) 2032 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2033 ret <8 x double> %s 2034} 2035 2036define <16 x float> @trunc_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 2037; CHECK-LABEL: trunc_v16f32_maskz_broadcast: 2038; CHECK: ## %bb.0: 2039; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2040; CHECK-NEXT: vrndscaleps $11, (%rdi){1to16}, %zmm0 {%k1} {z} 2041; CHECK-NEXT: retq 2042 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2043 %ps = load float, float* %ptr 2044 %pins = insertelement <16 x float> undef, float %ps, i32 0 2045 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2046 %t = call <16 x float> @llvm.trunc.v16f32(<16 x float> %p) 2047 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2048 ret <16 x float> %s 2049} 2050 2051define <2 x double> @rint_v2f64(<2 x double> %p) { 2052; CHECK-LABEL: rint_v2f64: 2053; CHECK: ## %bb.0: 2054; CHECK-NEXT: vroundpd $4, %xmm0, %xmm0 2055; CHECK-NEXT: retq 2056 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2057 ret <2 x double> %t 2058} 2059 2060define <4 x float> @rint_v4f32(<4 x float> %p) { 2061; CHECK-LABEL: rint_v4f32: 2062; CHECK: ## %bb.0: 2063; CHECK-NEXT: vroundps $4, %xmm0, %xmm0 2064; CHECK-NEXT: retq 2065 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2066 ret <4 x float> %t 2067} 2068 2069define <4 x double> @rint_v4f64(<4 x double> %p){ 2070; CHECK-LABEL: rint_v4f64: 2071; CHECK: ## %bb.0: 2072; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0 2073; CHECK-NEXT: retq 2074 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2075 ret <4 x double> %t 2076} 2077 2078define <8 x float> @rint_v8f32(<8 x float> %p) { 2079; CHECK-LABEL: rint_v8f32: 2080; CHECK: ## %bb.0: 2081; CHECK-NEXT: vroundps $4, %ymm0, %ymm0 2082; CHECK-NEXT: retq 2083 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2084 ret <8 x float> %t 2085} 2086 2087define <8 x double> @rint_v8f64(<8 x double> %p){ 2088; CHECK-LABEL: rint_v8f64: 2089; CHECK: ## %bb.0: 2090; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 2091; CHECK-NEXT: retq 2092 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2093 ret <8 x double> %t 2094} 2095 2096define <16 x float> @rint_v16f32(<16 x float> %p) { 2097; CHECK-LABEL: rint_v16f32: 2098; CHECK: ## %bb.0: 2099; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 2100; CHECK-NEXT: retq 2101 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2102 ret <16 x float> %t 2103} 2104 2105define <2 x double> @rint_v2f64_load(<2 x double>* %ptr) { 2106; CHECK-LABEL: rint_v2f64_load: 2107; CHECK: ## %bb.0: 2108; CHECK-NEXT: vroundpd $4, (%rdi), %xmm0 2109; CHECK-NEXT: retq 2110 %p = load <2 x double>, <2 x double>* %ptr 2111 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2112 ret <2 x double> %t 2113} 2114 2115define <4 x float> @rint_v4f32_load(<4 x float>* %ptr) { 2116; CHECK-LABEL: rint_v4f32_load: 2117; CHECK: ## %bb.0: 2118; CHECK-NEXT: vroundps $4, (%rdi), %xmm0 2119; CHECK-NEXT: retq 2120 %p = load <4 x float>, <4 x float>* %ptr 2121 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2122 ret <4 x float> %t 2123} 2124 2125define <4 x double> @rint_v4f64_load(<4 x double>* %ptr){ 2126; CHECK-LABEL: rint_v4f64_load: 2127; CHECK: ## %bb.0: 2128; CHECK-NEXT: vroundpd $4, (%rdi), %ymm0 2129; CHECK-NEXT: retq 2130 %p = load <4 x double>, <4 x double>* %ptr 2131 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2132 ret <4 x double> %t 2133} 2134 2135define <8 x float> @rint_v8f32_load(<8 x float>* %ptr) { 2136; CHECK-LABEL: rint_v8f32_load: 2137; CHECK: ## %bb.0: 2138; CHECK-NEXT: vroundps $4, (%rdi), %ymm0 2139; CHECK-NEXT: retq 2140 %p = load <8 x float>, <8 x float>* %ptr 2141 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2142 ret <8 x float> %t 2143} 2144 2145define <8 x double> @rint_v8f64_load(<8 x double>* %ptr){ 2146; CHECK-LABEL: rint_v8f64_load: 2147; CHECK: ## %bb.0: 2148; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 2149; CHECK-NEXT: retq 2150 %p = load <8 x double>, <8 x double>* %ptr 2151 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2152 ret <8 x double> %t 2153} 2154 2155define <16 x float> @rint_v16f32_load(<16 x float>* %ptr) { 2156; CHECK-LABEL: rint_v16f32_load: 2157; CHECK: ## %bb.0: 2158; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 2159; CHECK-NEXT: retq 2160 %p = load <16 x float>, <16 x float>* %ptr 2161 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2162 ret <16 x float> %t 2163} 2164 2165define <2 x double> @rint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 2166; CHECK-LABEL: rint_v2f64_mask: 2167; CHECK: ## %bb.0: 2168; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 2169; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} 2170; CHECK-NEXT: vmovapd %xmm1, %xmm0 2171; CHECK-NEXT: retq 2172 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2173 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2174 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2175 ret <2 x double> %s 2176} 2177 2178define <4 x float> @rint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 2179; CHECK-LABEL: rint_v4f32_mask: 2180; CHECK: ## %bb.0: 2181; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2182; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm1 {%k1} 2183; CHECK-NEXT: vmovaps %xmm1, %xmm0 2184; CHECK-NEXT: retq 2185 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2186 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2187 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2188 ret <4 x float> %s 2189} 2190 2191define <4 x double> @rint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 2192; CHECK-LABEL: rint_v4f64_mask: 2193; CHECK: ## %bb.0: 2194; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 2195; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} 2196; CHECK-NEXT: vmovapd %ymm1, %ymm0 2197; CHECK-NEXT: retq 2198 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2199 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2200 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2201 ret <4 x double> %s 2202} 2203 2204define <8 x float> @rint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 2205; CHECK-LABEL: rint_v8f32_mask: 2206; CHECK: ## %bb.0: 2207; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2208; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm1 {%k1} 2209; CHECK-NEXT: vmovaps %ymm1, %ymm0 2210; CHECK-NEXT: retq 2211 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2212 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2213 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2214 ret <8 x float> %s 2215} 2216 2217define <8 x double> @rint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 2218; CHECK-LABEL: rint_v8f64_mask: 2219; CHECK: ## %bb.0: 2220; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 2221; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm1 {%k1} 2222; CHECK-NEXT: vmovapd %zmm1, %zmm0 2223; CHECK-NEXT: retq 2224 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2225 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2226 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2227 ret <8 x double> %s 2228} 2229 2230define <16 x float> @rint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 2231; CHECK-LABEL: rint_v16f32_mask: 2232; CHECK: ## %bb.0: 2233; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2234; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm1 {%k1} 2235; CHECK-NEXT: vmovaps %zmm1, %zmm0 2236; CHECK-NEXT: retq 2237 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2238 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2239 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2240 ret <16 x float> %s 2241} 2242 2243define <2 x double> @rint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 2244; CHECK-LABEL: rint_v2f64_maskz: 2245; CHECK: ## %bb.0: 2246; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2247; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm0 {%k1} {z} 2248; CHECK-NEXT: retq 2249 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2250 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2251 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2252 ret <2 x double> %s 2253} 2254 2255define <4 x float> @rint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 2256; CHECK-LABEL: rint_v4f32_maskz: 2257; CHECK: ## %bb.0: 2258; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2259; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 {%k1} {z} 2260; CHECK-NEXT: retq 2261 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2262 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2263 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2264 ret <4 x float> %s 2265} 2266 2267define <4 x double> @rint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 2268; CHECK-LABEL: rint_v4f64_maskz: 2269; CHECK: ## %bb.0: 2270; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2271; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm0 {%k1} {z} 2272; CHECK-NEXT: retq 2273 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2274 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2275 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2276 ret <4 x double> %s 2277} 2278 2279define <8 x float> @rint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 2280; CHECK-LABEL: rint_v8f32_maskz: 2281; CHECK: ## %bb.0: 2282; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2283; CHECK-NEXT: vrndscaleps $4, %ymm0, %ymm0 {%k1} {z} 2284; CHECK-NEXT: retq 2285 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2286 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2287 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2288 ret <8 x float> %s 2289} 2290 2291define <8 x double> @rint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 2292; CHECK-LABEL: rint_v8f64_maskz: 2293; CHECK: ## %bb.0: 2294; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2295; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 {%k1} {z} 2296; CHECK-NEXT: retq 2297 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2298 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2299 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2300 ret <8 x double> %s 2301} 2302 2303define <16 x float> @rint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 2304; CHECK-LABEL: rint_v16f32_maskz: 2305; CHECK: ## %bb.0: 2306; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2307; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 {%k1} {z} 2308; CHECK-NEXT: retq 2309 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2310 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2311 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2312 ret <16 x float> %s 2313} 2314 2315define <2 x double> @rint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2316; CHECK-LABEL: rint_v2f64_mask_load: 2317; CHECK: ## %bb.0: 2318; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2319; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1} 2320; CHECK-NEXT: retq 2321 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2322 %p = load <2 x double>, <2 x double>* %ptr 2323 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2324 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2325 ret <2 x double> %s 2326} 2327 2328define <4 x float> @rint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 2329; CHECK-LABEL: rint_v4f32_mask_load: 2330; CHECK: ## %bb.0: 2331; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2332; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1} 2333; CHECK-NEXT: retq 2334 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2335 %p = load <4 x float>, <4 x float>* %ptr 2336 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2337 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2338 ret <4 x float> %s 2339} 2340 2341define <4 x double> @rint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 2342; CHECK-LABEL: rint_v4f64_mask_load: 2343; CHECK: ## %bb.0: 2344; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2345; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1} 2346; CHECK-NEXT: retq 2347 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2348 %p = load <4 x double>, <4 x double>* %ptr 2349 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2350 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2351 ret <4 x double> %s 2352} 2353 2354define <8 x float> @rint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 2355; CHECK-LABEL: rint_v8f32_mask_load: 2356; CHECK: ## %bb.0: 2357; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2358; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1} 2359; CHECK-NEXT: retq 2360 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2361 %p = load <8 x float>, <8 x float>* %ptr 2362 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2363 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2364 ret <8 x float> %s 2365} 2366 2367define <8 x double> @rint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 2368; CHECK-LABEL: rint_v8f64_mask_load: 2369; CHECK: ## %bb.0: 2370; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2371; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1} 2372; CHECK-NEXT: retq 2373 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2374 %p = load <8 x double>, <8 x double>* %ptr 2375 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2377 ret <8 x double> %s 2378} 2379 2380define <16 x float> @rint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 2381; CHECK-LABEL: rint_v16f32_mask_load: 2382; CHECK: ## %bb.0: 2383; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2384; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1} 2385; CHECK-NEXT: retq 2386 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2387 %p = load <16 x float>, <16 x float>* %ptr 2388 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2389 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2390 ret <16 x float> %s 2391} 2392 2393define <2 x double> @rint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 2394; CHECK-LABEL: rint_v2f64_maskz_load: 2395; CHECK: ## %bb.0: 2396; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2397; CHECK-NEXT: vrndscalepd $4, (%rdi), %xmm0 {%k1} {z} 2398; CHECK-NEXT: retq 2399 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2400 %p = load <2 x double>, <2 x double>* %ptr 2401 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2402 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2403 ret <2 x double> %s 2404} 2405 2406define <4 x float> @rint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 2407; CHECK-LABEL: rint_v4f32_maskz_load: 2408; CHECK: ## %bb.0: 2409; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2410; CHECK-NEXT: vrndscaleps $4, (%rdi), %xmm0 {%k1} {z} 2411; CHECK-NEXT: retq 2412 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2413 %p = load <4 x float>, <4 x float>* %ptr 2414 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2415 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2416 ret <4 x float> %s 2417} 2418 2419define <4 x double> @rint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 2420; CHECK-LABEL: rint_v4f64_maskz_load: 2421; CHECK: ## %bb.0: 2422; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2423; CHECK-NEXT: vrndscalepd $4, (%rdi), %ymm0 {%k1} {z} 2424; CHECK-NEXT: retq 2425 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2426 %p = load <4 x double>, <4 x double>* %ptr 2427 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2428 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2429 ret <4 x double> %s 2430} 2431 2432define <8 x float> @rint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 2433; CHECK-LABEL: rint_v8f32_maskz_load: 2434; CHECK: ## %bb.0: 2435; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2436; CHECK-NEXT: vrndscaleps $4, (%rdi), %ymm0 {%k1} {z} 2437; CHECK-NEXT: retq 2438 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2439 %p = load <8 x float>, <8 x float>* %ptr 2440 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2441 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2442 ret <8 x float> %s 2443} 2444 2445define <8 x double> @rint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 2446; CHECK-LABEL: rint_v8f64_maskz_load: 2447; CHECK: ## %bb.0: 2448; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2449; CHECK-NEXT: vrndscalepd $4, (%rdi), %zmm0 {%k1} {z} 2450; CHECK-NEXT: retq 2451 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2452 %p = load <8 x double>, <8 x double>* %ptr 2453 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2454 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2455 ret <8 x double> %s 2456} 2457 2458define <16 x float> @rint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 2459; CHECK-LABEL: rint_v16f32_maskz_load: 2460; CHECK: ## %bb.0: 2461; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2462; CHECK-NEXT: vrndscaleps $4, (%rdi), %zmm0 {%k1} {z} 2463; CHECK-NEXT: retq 2464 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2465 %p = load <16 x float>, <16 x float>* %ptr 2466 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2467 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2468 ret <16 x float> %s 2469} 2470 2471define <2 x double> @rint_v2f64_broadcast(double* %ptr) { 2472; CHECK-LABEL: rint_v2f64_broadcast: 2473; CHECK: ## %bb.0: 2474; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 2475; CHECK-NEXT: retq 2476 %ps = load double, double* %ptr 2477 %pins = insertelement <2 x double> undef, double %ps, i32 0 2478 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2479 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2480 ret <2 x double> %t 2481} 2482 2483define <4 x float> @rint_v4f32_broadcast(float* %ptr) { 2484; CHECK-LABEL: rint_v4f32_broadcast: 2485; CHECK: ## %bb.0: 2486; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 2487; CHECK-NEXT: retq 2488 %ps = load float, float* %ptr 2489 %pins = insertelement <4 x float> undef, float %ps, i32 0 2490 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2491 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2492 ret <4 x float> %t 2493} 2494 2495define <4 x double> @rint_v4f64_broadcast(double* %ptr){ 2496; CHECK-LABEL: rint_v4f64_broadcast: 2497; CHECK: ## %bb.0: 2498; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 2499; CHECK-NEXT: retq 2500 %ps = load double, double* %ptr 2501 %pins = insertelement <4 x double> undef, double %ps, i32 0 2502 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2503 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2504 ret <4 x double> %t 2505} 2506 2507define <8 x float> @rint_v8f32_broadcast(float* %ptr) { 2508; CHECK-LABEL: rint_v8f32_broadcast: 2509; CHECK: ## %bb.0: 2510; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 2511; CHECK-NEXT: retq 2512 %ps = load float, float* %ptr 2513 %pins = insertelement <8 x float> undef, float %ps, i32 0 2514 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2515 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2516 ret <8 x float> %t 2517} 2518 2519define <8 x double> @rint_v8f64_broadcast(double* %ptr){ 2520; CHECK-LABEL: rint_v8f64_broadcast: 2521; CHECK: ## %bb.0: 2522; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 2523; CHECK-NEXT: retq 2524 %ps = load double, double* %ptr 2525 %pins = insertelement <8 x double> undef, double %ps, i32 0 2526 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2527 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2528 ret <8 x double> %t 2529} 2530 2531define <16 x float> @rint_v16f32_broadcast(float* %ptr) { 2532; CHECK-LABEL: rint_v16f32_broadcast: 2533; CHECK: ## %bb.0: 2534; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 2535; CHECK-NEXT: retq 2536 %ps = load float, float* %ptr 2537 %pins = insertelement <16 x float> undef, float %ps, i32 0 2538 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2539 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2540 ret <16 x float> %t 2541} 2542 2543define <2 x double> @rint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2544; CHECK-LABEL: rint_v2f64_mask_broadcast: 2545; CHECK: ## %bb.0: 2546; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2547; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} 2548; CHECK-NEXT: retq 2549 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2550 %ps = load double, double* %ptr 2551 %pins = insertelement <2 x double> undef, double %ps, i32 0 2552 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2553 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2554 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2555 ret <2 x double> %s 2556} 2557 2558define <4 x float> @rint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 2559; CHECK-LABEL: rint_v4f32_mask_broadcast: 2560; CHECK: ## %bb.0: 2561; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2562; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} 2563; CHECK-NEXT: retq 2564 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2565 %ps = load float, float* %ptr 2566 %pins = insertelement <4 x float> undef, float %ps, i32 0 2567 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2568 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2569 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2570 ret <4 x float> %s 2571} 2572 2573define <4 x double> @rint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 2574; CHECK-LABEL: rint_v4f64_mask_broadcast: 2575; CHECK: ## %bb.0: 2576; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2577; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} 2578; CHECK-NEXT: retq 2579 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2580 %ps = load double, double* %ptr 2581 %pins = insertelement <4 x double> undef, double %ps, i32 0 2582 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2583 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2584 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2585 ret <4 x double> %s 2586} 2587 2588define <8 x float> @rint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 2589; CHECK-LABEL: rint_v8f32_mask_broadcast: 2590; CHECK: ## %bb.0: 2591; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2592; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} 2593; CHECK-NEXT: retq 2594 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2595 %ps = load float, float* %ptr 2596 %pins = insertelement <8 x float> undef, float %ps, i32 0 2597 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2598 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2599 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2600 ret <8 x float> %s 2601} 2602 2603define <8 x double> @rint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 2604; CHECK-LABEL: rint_v8f64_mask_broadcast: 2605; CHECK: ## %bb.0: 2606; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2607; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} 2608; CHECK-NEXT: retq 2609 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2610 %ps = load double, double* %ptr 2611 %pins = insertelement <8 x double> undef, double %ps, i32 0 2612 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2613 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2614 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2615 ret <8 x double> %s 2616} 2617 2618define <16 x float> @rint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 2619; CHECK-LABEL: rint_v16f32_mask_broadcast: 2620; CHECK: ## %bb.0: 2621; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2622; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} 2623; CHECK-NEXT: retq 2624 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2625 %ps = load float, float* %ptr 2626 %pins = insertelement <16 x float> undef, float %ps, i32 0 2627 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2628 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2629 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2630 ret <16 x float> %s 2631} 2632 2633define <2 x double> @rint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 2634; CHECK-LABEL: rint_v2f64_maskz_broadcast: 2635; CHECK: ## %bb.0: 2636; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 2637; CHECK-NEXT: vrndscalepd $4, (%rdi){1to2}, %xmm0 {%k1} {z} 2638; CHECK-NEXT: retq 2639 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2640 %ps = load double, double* %ptr 2641 %pins = insertelement <2 x double> undef, double %ps, i32 0 2642 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 2643 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 2644 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2645 ret <2 x double> %s 2646} 2647 2648define <4 x float> @rint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 2649; CHECK-LABEL: rint_v4f32_maskz_broadcast: 2650; CHECK: ## %bb.0: 2651; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 2652; CHECK-NEXT: vrndscaleps $4, (%rdi){1to4}, %xmm0 {%k1} {z} 2653; CHECK-NEXT: retq 2654 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2655 %ps = load float, float* %ptr 2656 %pins = insertelement <4 x float> undef, float %ps, i32 0 2657 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 2658 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 2659 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2660 ret <4 x float> %s 2661} 2662 2663define <4 x double> @rint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 2664; CHECK-LABEL: rint_v4f64_maskz_broadcast: 2665; CHECK: ## %bb.0: 2666; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 2667; CHECK-NEXT: vrndscalepd $4, (%rdi){1to4}, %ymm0 {%k1} {z} 2668; CHECK-NEXT: retq 2669 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2670 %ps = load double, double* %ptr 2671 %pins = insertelement <4 x double> undef, double %ps, i32 0 2672 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 2673 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 2674 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2675 ret <4 x double> %s 2676} 2677 2678define <8 x float> @rint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 2679; CHECK-LABEL: rint_v8f32_maskz_broadcast: 2680; CHECK: ## %bb.0: 2681; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 2682; CHECK-NEXT: vrndscaleps $4, (%rdi){1to8}, %ymm0 {%k1} {z} 2683; CHECK-NEXT: retq 2684 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2685 %ps = load float, float* %ptr 2686 %pins = insertelement <8 x float> undef, float %ps, i32 0 2687 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 2688 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 2689 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2690 ret <8 x float> %s 2691} 2692 2693define <8 x double> @rint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 2694; CHECK-LABEL: rint_v8f64_maskz_broadcast: 2695; CHECK: ## %bb.0: 2696; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 2697; CHECK-NEXT: vrndscalepd $4, (%rdi){1to8}, %zmm0 {%k1} {z} 2698; CHECK-NEXT: retq 2699 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2700 %ps = load double, double* %ptr 2701 %pins = insertelement <8 x double> undef, double %ps, i32 0 2702 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 2703 %t = call <8 x double> @llvm.rint.v8f64(<8 x double> %p) 2704 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2705 ret <8 x double> %s 2706} 2707 2708define <16 x float> @rint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 2709; CHECK-LABEL: rint_v16f32_maskz_broadcast: 2710; CHECK: ## %bb.0: 2711; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 2712; CHECK-NEXT: vrndscaleps $4, (%rdi){1to16}, %zmm0 {%k1} {z} 2713; CHECK-NEXT: retq 2714 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2715 %ps = load float, float* %ptr 2716 %pins = insertelement <16 x float> undef, float %ps, i32 0 2717 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 2718 %t = call <16 x float> @llvm.rint.v16f32(<16 x float> %p) 2719 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2720 ret <16 x float> %s 2721} 2722 2723define <2 x double> @nearbyint_v2f64(<2 x double> %p) { 2724; CHECK-LABEL: nearbyint_v2f64: 2725; CHECK: ## %bb.0: 2726; CHECK-NEXT: vroundpd $12, %xmm0, %xmm0 2727; CHECK-NEXT: retq 2728 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2729 ret <2 x double> %t 2730} 2731 2732define <4 x float> @nearbyint_v4f32(<4 x float> %p) { 2733; CHECK-LABEL: nearbyint_v4f32: 2734; CHECK: ## %bb.0: 2735; CHECK-NEXT: vroundps $12, %xmm0, %xmm0 2736; CHECK-NEXT: retq 2737 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2738 ret <4 x float> %t 2739} 2740 2741define <4 x double> @nearbyint_v4f64(<4 x double> %p){ 2742; CHECK-LABEL: nearbyint_v4f64: 2743; CHECK: ## %bb.0: 2744; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0 2745; CHECK-NEXT: retq 2746 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2747 ret <4 x double> %t 2748} 2749 2750define <8 x float> @nearbyint_v8f32(<8 x float> %p) { 2751; CHECK-LABEL: nearbyint_v8f32: 2752; CHECK: ## %bb.0: 2753; CHECK-NEXT: vroundps $12, %ymm0, %ymm0 2754; CHECK-NEXT: retq 2755 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2756 ret <8 x float> %t 2757} 2758 2759define <8 x double> @nearbyint_v8f64(<8 x double> %p){ 2760; CHECK-LABEL: nearbyint_v8f64: 2761; CHECK: ## %bb.0: 2762; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 2763; CHECK-NEXT: retq 2764 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2765 ret <8 x double> %t 2766} 2767 2768define <16 x float> @nearbyint_v16f32(<16 x float> %p) { 2769; CHECK-LABEL: nearbyint_v16f32: 2770; CHECK: ## %bb.0: 2771; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 2772; CHECK-NEXT: retq 2773 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2774 ret <16 x float> %t 2775} 2776 2777define <2 x double> @nearbyint_v2f64_load(<2 x double>* %ptr) { 2778; CHECK-LABEL: nearbyint_v2f64_load: 2779; CHECK: ## %bb.0: 2780; CHECK-NEXT: vroundpd $12, (%rdi), %xmm0 2781; CHECK-NEXT: retq 2782 %p = load <2 x double>, <2 x double>* %ptr 2783 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2784 ret <2 x double> %t 2785} 2786 2787define <4 x float> @nearbyint_v4f32_load(<4 x float>* %ptr) { 2788; CHECK-LABEL: nearbyint_v4f32_load: 2789; CHECK: ## %bb.0: 2790; CHECK-NEXT: vroundps $12, (%rdi), %xmm0 2791; CHECK-NEXT: retq 2792 %p = load <4 x float>, <4 x float>* %ptr 2793 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2794 ret <4 x float> %t 2795} 2796 2797define <4 x double> @nearbyint_v4f64_load(<4 x double>* %ptr){ 2798; CHECK-LABEL: nearbyint_v4f64_load: 2799; CHECK: ## %bb.0: 2800; CHECK-NEXT: vroundpd $12, (%rdi), %ymm0 2801; CHECK-NEXT: retq 2802 %p = load <4 x double>, <4 x double>* %ptr 2803 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2804 ret <4 x double> %t 2805} 2806 2807define <8 x float> @nearbyint_v8f32_load(<8 x float>* %ptr) { 2808; CHECK-LABEL: nearbyint_v8f32_load: 2809; CHECK: ## %bb.0: 2810; CHECK-NEXT: vroundps $12, (%rdi), %ymm0 2811; CHECK-NEXT: retq 2812 %p = load <8 x float>, <8 x float>* %ptr 2813 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2814 ret <8 x float> %t 2815} 2816 2817define <8 x double> @nearbyint_v8f64_load(<8 x double>* %ptr){ 2818; CHECK-LABEL: nearbyint_v8f64_load: 2819; CHECK: ## %bb.0: 2820; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 2821; CHECK-NEXT: retq 2822 %p = load <8 x double>, <8 x double>* %ptr 2823 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2824 ret <8 x double> %t 2825} 2826 2827define <16 x float> @nearbyint_v16f32_load(<16 x float>* %ptr) { 2828; CHECK-LABEL: nearbyint_v16f32_load: 2829; CHECK: ## %bb.0: 2830; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 2831; CHECK-NEXT: retq 2832 %p = load <16 x float>, <16 x float>* %ptr 2833 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2834 ret <16 x float> %t 2835} 2836 2837define <2 x double> @nearbyint_v2f64_mask(<2 x double> %p, <2 x double> %passthru, <2 x i64> %cmp) { 2838; CHECK-LABEL: nearbyint_v2f64_mask: 2839; CHECK: ## %bb.0: 2840; CHECK-NEXT: vptestnmq %xmm2, %xmm2, %k1 2841; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm1 {%k1} 2842; CHECK-NEXT: vmovapd %xmm1, %xmm0 2843; CHECK-NEXT: retq 2844 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2845 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2846 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2847 ret <2 x double> %s 2848} 2849 2850define <4 x float> @nearbyint_v4f32_mask(<4 x float> %p, <4 x float> %passthru, <4 x i32> %cmp) { 2851; CHECK-LABEL: nearbyint_v4f32_mask: 2852; CHECK: ## %bb.0: 2853; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 2854; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm1 {%k1} 2855; CHECK-NEXT: vmovaps %xmm1, %xmm0 2856; CHECK-NEXT: retq 2857 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2858 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2859 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 2860 ret <4 x float> %s 2861} 2862 2863define <4 x double> @nearbyint_v4f64_mask(<4 x double> %p, <4 x double> %passthru, <4 x i64> %cmp) { 2864; CHECK-LABEL: nearbyint_v4f64_mask: 2865; CHECK: ## %bb.0: 2866; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1 2867; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm1 {%k1} 2868; CHECK-NEXT: vmovapd %ymm1, %ymm0 2869; CHECK-NEXT: retq 2870 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2871 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2872 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 2873 ret <4 x double> %s 2874} 2875 2876define <8 x float> @nearbyint_v8f32_mask(<8 x float> %p, <8 x float> %passthru, <8 x i32> %cmp) { 2877; CHECK-LABEL: nearbyint_v8f32_mask: 2878; CHECK: ## %bb.0: 2879; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 2880; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm1 {%k1} 2881; CHECK-NEXT: vmovaps %ymm1, %ymm0 2882; CHECK-NEXT: retq 2883 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2884 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2885 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 2886 ret <8 x float> %s 2887} 2888 2889define <8 x double> @nearbyint_v8f64_mask(<8 x double> %p, <8 x double> %passthru, <8 x i64> %cmp) { 2890; CHECK-LABEL: nearbyint_v8f64_mask: 2891; CHECK: ## %bb.0: 2892; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1 2893; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm1 {%k1} 2894; CHECK-NEXT: vmovapd %zmm1, %zmm0 2895; CHECK-NEXT: retq 2896 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2897 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2898 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 2899 ret <8 x double> %s 2900} 2901 2902define <16 x float> @nearbyint_v16f32_mask(<16 x float> %p, <16 x float> %passthru, <16 x i32> %cmp) { 2903; CHECK-LABEL: nearbyint_v16f32_mask: 2904; CHECK: ## %bb.0: 2905; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 2906; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm1 {%k1} 2907; CHECK-NEXT: vmovaps %zmm1, %zmm0 2908; CHECK-NEXT: retq 2909 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2910 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2911 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 2912 ret <16 x float> %s 2913} 2914 2915define <2 x double> @nearbyint_v2f64_maskz(<2 x double> %p, <2 x i64> %cmp) { 2916; CHECK-LABEL: nearbyint_v2f64_maskz: 2917; CHECK: ## %bb.0: 2918; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2919; CHECK-NEXT: vrndscalepd $12, %xmm0, %xmm0 {%k1} {z} 2920; CHECK-NEXT: retq 2921 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2922 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2923 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 2924 ret <2 x double> %s 2925} 2926 2927define <4 x float> @nearbyint_v4f32_maskz(<4 x float> %p, <4 x i32> %cmp) { 2928; CHECK-LABEL: nearbyint_v4f32_maskz: 2929; CHECK: ## %bb.0: 2930; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 2931; CHECK-NEXT: vrndscaleps $12, %xmm0, %xmm0 {%k1} {z} 2932; CHECK-NEXT: retq 2933 %c = icmp eq <4 x i32> %cmp, zeroinitializer 2934 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 2935 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 2936 ret <4 x float> %s 2937} 2938 2939define <4 x double> @nearbyint_v4f64_maskz(<4 x double> %p, <4 x i64> %cmp) { 2940; CHECK-LABEL: nearbyint_v4f64_maskz: 2941; CHECK: ## %bb.0: 2942; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 2943; CHECK-NEXT: vrndscalepd $12, %ymm0, %ymm0 {%k1} {z} 2944; CHECK-NEXT: retq 2945 %c = icmp eq <4 x i64> %cmp, zeroinitializer 2946 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 2947 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 2948 ret <4 x double> %s 2949} 2950 2951define <8 x float> @nearbyint_v8f32_maskz(<8 x float> %p, <8 x i32> %cmp) { 2952; CHECK-LABEL: nearbyint_v8f32_maskz: 2953; CHECK: ## %bb.0: 2954; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 2955; CHECK-NEXT: vrndscaleps $12, %ymm0, %ymm0 {%k1} {z} 2956; CHECK-NEXT: retq 2957 %c = icmp eq <8 x i32> %cmp, zeroinitializer 2958 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 2959 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 2960 ret <8 x float> %s 2961} 2962 2963define <8 x double> @nearbyint_v8f64_maskz(<8 x double> %p, <8 x i64> %cmp) { 2964; CHECK-LABEL: nearbyint_v8f64_maskz: 2965; CHECK: ## %bb.0: 2966; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 2967; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 {%k1} {z} 2968; CHECK-NEXT: retq 2969 %c = icmp eq <8 x i64> %cmp, zeroinitializer 2970 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 2971 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 2972 ret <8 x double> %s 2973} 2974 2975define <16 x float> @nearbyint_v16f32_maskz(<16 x float> %p, <16 x i32> %cmp) { 2976; CHECK-LABEL: nearbyint_v16f32_maskz: 2977; CHECK: ## %bb.0: 2978; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 2979; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 {%k1} {z} 2980; CHECK-NEXT: retq 2981 %c = icmp eq <16 x i32> %cmp, zeroinitializer 2982 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 2983 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 2984 ret <16 x float> %s 2985} 2986 2987define <2 x double> @nearbyint_v2f64_mask_load(<2 x double>* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 2988; CHECK-LABEL: nearbyint_v2f64_mask_load: 2989; CHECK: ## %bb.0: 2990; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 2991; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1} 2992; CHECK-NEXT: retq 2993 %c = icmp eq <2 x i64> %cmp, zeroinitializer 2994 %p = load <2 x double>, <2 x double>* %ptr 2995 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 2996 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 2997 ret <2 x double> %s 2998} 2999 3000define <4 x float> @nearbyint_v4f32_mask_load(<4 x float>* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 3001; CHECK-LABEL: nearbyint_v4f32_mask_load: 3002; CHECK: ## %bb.0: 3003; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 3004; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1} 3005; CHECK-NEXT: retq 3006 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3007 %p = load <4 x float>, <4 x float>* %ptr 3008 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3009 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 3010 ret <4 x float> %s 3011} 3012 3013define <4 x double> @nearbyint_v4f64_mask_load(<4 x double>* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 3014; CHECK-LABEL: nearbyint_v4f64_mask_load: 3015; CHECK: ## %bb.0: 3016; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 3017; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1} 3018; CHECK-NEXT: retq 3019 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3020 %p = load <4 x double>, <4 x double>* %ptr 3021 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3022 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 3023 ret <4 x double> %s 3024} 3025 3026define <8 x float> @nearbyint_v8f32_mask_load(<8 x float>* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 3027; CHECK-LABEL: nearbyint_v8f32_mask_load: 3028; CHECK: ## %bb.0: 3029; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 3030; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1} 3031; CHECK-NEXT: retq 3032 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3033 %p = load <8 x float>, <8 x float>* %ptr 3034 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3035 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 3036 ret <8 x float> %s 3037} 3038 3039define <8 x double> @nearbyint_v8f64_mask_load(<8 x double>* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 3040; CHECK-LABEL: nearbyint_v8f64_mask_load: 3041; CHECK: ## %bb.0: 3042; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 3043; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1} 3044; CHECK-NEXT: retq 3045 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3046 %p = load <8 x double>, <8 x double>* %ptr 3047 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3048 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 3049 ret <8 x double> %s 3050} 3051 3052define <16 x float> @nearbyint_v16f32_mask_load(<16 x float>* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 3053; CHECK-LABEL: nearbyint_v16f32_mask_load: 3054; CHECK: ## %bb.0: 3055; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 3056; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1} 3057; CHECK-NEXT: retq 3058 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3059 %p = load <16 x float>, <16 x float>* %ptr 3060 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3061 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 3062 ret <16 x float> %s 3063} 3064 3065define <2 x double> @nearbyint_v2f64_maskz_load(<2 x double>* %ptr, <2 x i64> %cmp) { 3066; CHECK-LABEL: nearbyint_v2f64_maskz_load: 3067; CHECK: ## %bb.0: 3068; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 3069; CHECK-NEXT: vrndscalepd $12, (%rdi), %xmm0 {%k1} {z} 3070; CHECK-NEXT: retq 3071 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3072 %p = load <2 x double>, <2 x double>* %ptr 3073 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3074 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 3075 ret <2 x double> %s 3076} 3077 3078define <4 x float> @nearbyint_v4f32_maskz_load(<4 x float>* %ptr, <4 x i32> %cmp) { 3079; CHECK-LABEL: nearbyint_v4f32_maskz_load: 3080; CHECK: ## %bb.0: 3081; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 3082; CHECK-NEXT: vrndscaleps $12, (%rdi), %xmm0 {%k1} {z} 3083; CHECK-NEXT: retq 3084 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3085 %p = load <4 x float>, <4 x float>* %ptr 3086 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3087 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 3088 ret <4 x float> %s 3089} 3090 3091define <4 x double> @nearbyint_v4f64_maskz_load(<4 x double>* %ptr, <4 x i64> %cmp) { 3092; CHECK-LABEL: nearbyint_v4f64_maskz_load: 3093; CHECK: ## %bb.0: 3094; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 3095; CHECK-NEXT: vrndscalepd $12, (%rdi), %ymm0 {%k1} {z} 3096; CHECK-NEXT: retq 3097 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3098 %p = load <4 x double>, <4 x double>* %ptr 3099 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3100 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 3101 ret <4 x double> %s 3102} 3103 3104define <8 x float> @nearbyint_v8f32_maskz_load(<8 x float>* %ptr, <8 x i32> %cmp) { 3105; CHECK-LABEL: nearbyint_v8f32_maskz_load: 3106; CHECK: ## %bb.0: 3107; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 3108; CHECK-NEXT: vrndscaleps $12, (%rdi), %ymm0 {%k1} {z} 3109; CHECK-NEXT: retq 3110 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3111 %p = load <8 x float>, <8 x float>* %ptr 3112 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3113 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 3114 ret <8 x float> %s 3115} 3116 3117define <8 x double> @nearbyint_v8f64_maskz_load(<8 x double>* %ptr, <8 x i64> %cmp) { 3118; CHECK-LABEL: nearbyint_v8f64_maskz_load: 3119; CHECK: ## %bb.0: 3120; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 3121; CHECK-NEXT: vrndscalepd $12, (%rdi), %zmm0 {%k1} {z} 3122; CHECK-NEXT: retq 3123 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3124 %p = load <8 x double>, <8 x double>* %ptr 3125 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3126 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 3127 ret <8 x double> %s 3128} 3129 3130define <16 x float> @nearbyint_v16f32_maskz_load(<16 x float>* %ptr, <16 x i32> %cmp) { 3131; CHECK-LABEL: nearbyint_v16f32_maskz_load: 3132; CHECK: ## %bb.0: 3133; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 3134; CHECK-NEXT: vrndscaleps $12, (%rdi), %zmm0 {%k1} {z} 3135; CHECK-NEXT: retq 3136 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3137 %p = load <16 x float>, <16 x float>* %ptr 3138 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3139 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 3140 ret <16 x float> %s 3141} 3142 3143define <2 x double> @nearbyint_v2f64_broadcast(double* %ptr) { 3144; CHECK-LABEL: nearbyint_v2f64_broadcast: 3145; CHECK: ## %bb.0: 3146; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 3147; CHECK-NEXT: retq 3148 %ps = load double, double* %ptr 3149 %pins = insertelement <2 x double> undef, double %ps, i32 0 3150 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3151 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3152 ret <2 x double> %t 3153} 3154 3155define <4 x float> @nearbyint_v4f32_broadcast(float* %ptr) { 3156; CHECK-LABEL: nearbyint_v4f32_broadcast: 3157; CHECK: ## %bb.0: 3158; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 3159; CHECK-NEXT: retq 3160 %ps = load float, float* %ptr 3161 %pins = insertelement <4 x float> undef, float %ps, i32 0 3162 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3163 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3164 ret <4 x float> %t 3165} 3166 3167define <4 x double> @nearbyint_v4f64_broadcast(double* %ptr){ 3168; CHECK-LABEL: nearbyint_v4f64_broadcast: 3169; CHECK: ## %bb.0: 3170; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 3171; CHECK-NEXT: retq 3172 %ps = load double, double* %ptr 3173 %pins = insertelement <4 x double> undef, double %ps, i32 0 3174 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3175 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3176 ret <4 x double> %t 3177} 3178 3179define <8 x float> @nearbyint_v8f32_broadcast(float* %ptr) { 3180; CHECK-LABEL: nearbyint_v8f32_broadcast: 3181; CHECK: ## %bb.0: 3182; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 3183; CHECK-NEXT: retq 3184 %ps = load float, float* %ptr 3185 %pins = insertelement <8 x float> undef, float %ps, i32 0 3186 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3187 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3188 ret <8 x float> %t 3189} 3190 3191define <8 x double> @nearbyint_v8f64_broadcast(double* %ptr){ 3192; CHECK-LABEL: nearbyint_v8f64_broadcast: 3193; CHECK: ## %bb.0: 3194; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 3195; CHECK-NEXT: retq 3196 %ps = load double, double* %ptr 3197 %pins = insertelement <8 x double> undef, double %ps, i32 0 3198 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3199 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3200 ret <8 x double> %t 3201} 3202 3203define <16 x float> @nearbyint_v16f32_broadcast(float* %ptr) { 3204; CHECK-LABEL: nearbyint_v16f32_broadcast: 3205; CHECK: ## %bb.0: 3206; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 3207; CHECK-NEXT: retq 3208 %ps = load float, float* %ptr 3209 %pins = insertelement <16 x float> undef, float %ps, i32 0 3210 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3211 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3212 ret <16 x float> %t 3213} 3214 3215define <2 x double> @nearbyint_v2f64_mask_broadcast(double* %ptr, <2 x double> %passthru, <2 x i64> %cmp) { 3216; CHECK-LABEL: nearbyint_v2f64_mask_broadcast: 3217; CHECK: ## %bb.0: 3218; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1 3219; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} 3220; CHECK-NEXT: retq 3221 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3222 %ps = load double, double* %ptr 3223 %pins = insertelement <2 x double> undef, double %ps, i32 0 3224 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3225 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3226 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> %passthru 3227 ret <2 x double> %s 3228} 3229 3230define <4 x float> @nearbyint_v4f32_mask_broadcast(float* %ptr, <4 x float> %passthru, <4 x i32> %cmp) { 3231; CHECK-LABEL: nearbyint_v4f32_mask_broadcast: 3232; CHECK: ## %bb.0: 3233; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 3234; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} 3235; CHECK-NEXT: retq 3236 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3237 %ps = load float, float* %ptr 3238 %pins = insertelement <4 x float> undef, float %ps, i32 0 3239 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3240 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3241 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> %passthru 3242 ret <4 x float> %s 3243} 3244 3245define <4 x double> @nearbyint_v4f64_mask_broadcast(double* %ptr, <4 x double> %passthru, <4 x i64> %cmp) { 3246; CHECK-LABEL: nearbyint_v4f64_mask_broadcast: 3247; CHECK: ## %bb.0: 3248; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 3249; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} 3250; CHECK-NEXT: retq 3251 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3252 %ps = load double, double* %ptr 3253 %pins = insertelement <4 x double> undef, double %ps, i32 0 3254 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3255 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3256 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> %passthru 3257 ret <4 x double> %s 3258} 3259 3260define <8 x float> @nearbyint_v8f32_mask_broadcast(float* %ptr, <8 x float> %passthru, <8 x i32> %cmp) { 3261; CHECK-LABEL: nearbyint_v8f32_mask_broadcast: 3262; CHECK: ## %bb.0: 3263; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 3264; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} 3265; CHECK-NEXT: retq 3266 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3267 %ps = load float, float* %ptr 3268 %pins = insertelement <8 x float> undef, float %ps, i32 0 3269 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3270 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3271 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> %passthru 3272 ret <8 x float> %s 3273} 3274 3275define <8 x double> @nearbyint_v8f64_mask_broadcast(double* %ptr, <8 x double> %passthru, <8 x i64> %cmp) { 3276; CHECK-LABEL: nearbyint_v8f64_mask_broadcast: 3277; CHECK: ## %bb.0: 3278; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 3279; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} 3280; CHECK-NEXT: retq 3281 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3282 %ps = load double, double* %ptr 3283 %pins = insertelement <8 x double> undef, double %ps, i32 0 3284 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3285 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3286 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> %passthru 3287 ret <8 x double> %s 3288} 3289 3290define <16 x float> @nearbyint_v16f32_mask_broadcast(float* %ptr, <16 x float> %passthru, <16 x i32> %cmp) { 3291; CHECK-LABEL: nearbyint_v16f32_mask_broadcast: 3292; CHECK: ## %bb.0: 3293; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 3294; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} 3295; CHECK-NEXT: retq 3296 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3297 %ps = load float, float* %ptr 3298 %pins = insertelement <16 x float> undef, float %ps, i32 0 3299 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3300 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3301 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> %passthru 3302 ret <16 x float> %s 3303} 3304 3305define <2 x double> @nearbyint_v2f64_maskz_broadcast(double* %ptr, <2 x i64> %cmp) { 3306; CHECK-LABEL: nearbyint_v2f64_maskz_broadcast: 3307; CHECK: ## %bb.0: 3308; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k1 3309; CHECK-NEXT: vrndscalepd $12, (%rdi){1to2}, %xmm0 {%k1} {z} 3310; CHECK-NEXT: retq 3311 %c = icmp eq <2 x i64> %cmp, zeroinitializer 3312 %ps = load double, double* %ptr 3313 %pins = insertelement <2 x double> undef, double %ps, i32 0 3314 %p = shufflevector <2 x double> %pins, <2 x double> undef, <2 x i32> zeroinitializer 3315 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 3316 %s = select <2 x i1> %c, <2 x double> %t, <2 x double> zeroinitializer 3317 ret <2 x double> %s 3318} 3319 3320define <4 x float> @nearbyint_v4f32_maskz_broadcast(float* %ptr, <4 x i32> %cmp) { 3321; CHECK-LABEL: nearbyint_v4f32_maskz_broadcast: 3322; CHECK: ## %bb.0: 3323; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 3324; CHECK-NEXT: vrndscaleps $12, (%rdi){1to4}, %xmm0 {%k1} {z} 3325; CHECK-NEXT: retq 3326 %c = icmp eq <4 x i32> %cmp, zeroinitializer 3327 %ps = load float, float* %ptr 3328 %pins = insertelement <4 x float> undef, float %ps, i32 0 3329 %p = shufflevector <4 x float> %pins, <4 x float> undef, <4 x i32> zeroinitializer 3330 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 3331 %s = select <4 x i1> %c, <4 x float> %t, <4 x float> zeroinitializer 3332 ret <4 x float> %s 3333} 3334 3335define <4 x double> @nearbyint_v4f64_maskz_broadcast(double* %ptr, <4 x i64> %cmp) { 3336; CHECK-LABEL: nearbyint_v4f64_maskz_broadcast: 3337; CHECK: ## %bb.0: 3338; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 3339; CHECK-NEXT: vrndscalepd $12, (%rdi){1to4}, %ymm0 {%k1} {z} 3340; CHECK-NEXT: retq 3341 %c = icmp eq <4 x i64> %cmp, zeroinitializer 3342 %ps = load double, double* %ptr 3343 %pins = insertelement <4 x double> undef, double %ps, i32 0 3344 %p = shufflevector <4 x double> %pins, <4 x double> undef, <4 x i32> zeroinitializer 3345 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 3346 %s = select <4 x i1> %c, <4 x double> %t, <4 x double> zeroinitializer 3347 ret <4 x double> %s 3348} 3349 3350define <8 x float> @nearbyint_v8f32_maskz_broadcast(float* %ptr, <8 x i32> %cmp) { 3351; CHECK-LABEL: nearbyint_v8f32_maskz_broadcast: 3352; CHECK: ## %bb.0: 3353; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 3354; CHECK-NEXT: vrndscaleps $12, (%rdi){1to8}, %ymm0 {%k1} {z} 3355; CHECK-NEXT: retq 3356 %c = icmp eq <8 x i32> %cmp, zeroinitializer 3357 %ps = load float, float* %ptr 3358 %pins = insertelement <8 x float> undef, float %ps, i32 0 3359 %p = shufflevector <8 x float> %pins, <8 x float> undef, <8 x i32> zeroinitializer 3360 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 3361 %s = select <8 x i1> %c, <8 x float> %t, <8 x float> zeroinitializer 3362 ret <8 x float> %s 3363} 3364 3365define <8 x double> @nearbyint_v8f64_maskz_broadcast(double* %ptr, <8 x i64> %cmp) { 3366; CHECK-LABEL: nearbyint_v8f64_maskz_broadcast: 3367; CHECK: ## %bb.0: 3368; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 3369; CHECK-NEXT: vrndscalepd $12, (%rdi){1to8}, %zmm0 {%k1} {z} 3370; CHECK-NEXT: retq 3371 %c = icmp eq <8 x i64> %cmp, zeroinitializer 3372 %ps = load double, double* %ptr 3373 %pins = insertelement <8 x double> undef, double %ps, i32 0 3374 %p = shufflevector <8 x double> %pins, <8 x double> undef, <8 x i32> zeroinitializer 3375 %t = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p) 3376 %s = select <8 x i1> %c, <8 x double> %t, <8 x double> zeroinitializer 3377 ret <8 x double> %s 3378} 3379 3380define <16 x float> @nearbyint_v16f32_maskz_broadcast(float* %ptr, <16 x i32> %cmp) { 3381; CHECK-LABEL: nearbyint_v16f32_maskz_broadcast: 3382; CHECK: ## %bb.0: 3383; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 3384; CHECK-NEXT: vrndscaleps $12, (%rdi){1to16}, %zmm0 {%k1} {z} 3385; CHECK-NEXT: retq 3386 %c = icmp eq <16 x i32> %cmp, zeroinitializer 3387 %ps = load float, float* %ptr 3388 %pins = insertelement <16 x float> undef, float %ps, i32 0 3389 %p = shufflevector <16 x float> %pins, <16 x float> undef, <16 x i32> zeroinitializer 3390 %t = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p) 3391 %s = select <16 x i1> %c, <16 x float> %t, <16 x float> zeroinitializer 3392 ret <16 x float> %s 3393} 3394