1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s 3 4 5define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { 6; SKX-LABEL: extract_subvector128_v32i16: 7; SKX: ## %bb.0: 8; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 9; SKX-NEXT: vzeroupper 10; SKX-NEXT: retq 11 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 12 ret <8 x i16> %r1 13} 14 15define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind { 16; SKX-LABEL: extract_subvector128_v32i16_first_element: 17; SKX: ## %bb.0: 18; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 19; SKX-NEXT: vzeroupper 20; SKX-NEXT: retq 21 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22 ret <8 x i16> %r1 23} 24 25define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { 26; SKX-LABEL: extract_subvector128_v64i8: 27; SKX: ## %bb.0: 28; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 29; SKX-NEXT: vzeroupper 30; SKX-NEXT: retq 31 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> 32 ret <16 x i8> %r1 33} 34 35define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind { 36; SKX-LABEL: extract_subvector128_v64i8_first_element: 37; SKX: ## %bb.0: 38; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 39; SKX-NEXT: vzeroupper 40; SKX-NEXT: retq 41 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 42 ret <16 x i8> %r1 43} 44 45 46define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { 47; SKX-LABEL: extract_subvector256_v32i16: 48; SKX: ## %bb.0: 49; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 50; SKX-NEXT: retq 51 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 52 ret <16 x i16> %r1 53} 54 55define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { 56; SKX-LABEL: extract_subvector256_v64i8: 57; SKX: ## %bb.0: 58; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 59; SKX-NEXT: retq 60 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 61 ret <32 x i8> %r1 62} 63 64define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 65; SKX-LABEL: extract_subvector256_v8f64_store: 66; SKX: ## %bb.0: ## %entry 67; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 68; SKX-NEXT: vzeroupper 69; SKX-NEXT: retq 70entry: 71 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> 72 %1 = bitcast double* %addr to <2 x double>* 73 store <2 x double> %0, <2 x double>* %1, align 1 74 ret void 75} 76 77define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 78; SKX-LABEL: extract_subvector256_v8f32_store: 79; SKX: ## %bb.0: ## %entry 80; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 81; SKX-NEXT: vzeroupper 82; SKX-NEXT: retq 83entry: 84 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 85 %1 = bitcast float* %addr to <4 x float>* 86 store <4 x float> %0, <4 x float>* %1, align 1 87 ret void 88} 89 90define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 91; SKX-LABEL: extract_subvector256_v4i64_store: 92; SKX: ## %bb.0: ## %entry 93; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 94; SKX-NEXT: vzeroupper 95; SKX-NEXT: retq 96entry: 97 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 98 %1 = bitcast i64* %addr to <2 x i64>* 99 store <2 x i64> %0, <2 x i64>* %1, align 1 100 ret void 101} 102 103define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 104; SKX-LABEL: extract_subvector256_v8i32_store: 105; SKX: ## %bb.0: ## %entry 106; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 107; SKX-NEXT: vzeroupper 108; SKX-NEXT: retq 109entry: 110 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 111 %1 = bitcast i32* %addr to <4 x i32>* 112 store <4 x i32> %0, <4 x i32>* %1, align 1 113 ret void 114} 115 116define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 117; SKX-LABEL: extract_subvector256_v16i16_store: 118; SKX: ## %bb.0: ## %entry 119; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 120; SKX-NEXT: vzeroupper 121; SKX-NEXT: retq 122entry: 123 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 124 %1 = bitcast i16* %addr to <8 x i16>* 125 store <8 x i16> %0, <8 x i16>* %1, align 1 126 ret void 127} 128 129define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 130; SKX-LABEL: extract_subvector256_v32i8_store: 131; SKX: ## %bb.0: ## %entry 132; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) 133; SKX-NEXT: vzeroupper 134; SKX-NEXT: retq 135entry: 136 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 137 %1 = bitcast i8* %addr to <16 x i8>* 138 store <16 x i8> %0, <16 x i8>* %1, align 1 139 ret void 140} 141 142define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 143; SKX-LABEL: extract_subvector256_v4f64_store_lo: 144; SKX: ## %bb.0: ## %entry 145; SKX-NEXT: vmovups %xmm0, (%rdi) 146; SKX-NEXT: vzeroupper 147; SKX-NEXT: retq 148entry: 149 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 150 %1 = bitcast double* %addr to <2 x double>* 151 store <2 x double> %0, <2 x double>* %1, align 1 152 ret void 153} 154 155define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 156; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16: 157; SKX: ## %bb.0: ## %entry 158; SKX-NEXT: vmovaps %xmm0, (%rdi) 159; SKX-NEXT: vzeroupper 160; SKX-NEXT: retq 161entry: 162 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 163 %1 = bitcast double* %addr to <2 x double>* 164 store <2 x double> %0, <2 x double>* %1, align 16 165 ret void 166} 167 168define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 169; SKX-LABEL: extract_subvector256_v4f32_store_lo: 170; SKX: ## %bb.0: ## %entry 171; SKX-NEXT: vmovups %xmm0, (%rdi) 172; SKX-NEXT: vzeroupper 173; SKX-NEXT: retq 174entry: 175 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 176 %1 = bitcast float* %addr to <4 x float>* 177 store <4 x float> %0, <4 x float>* %1, align 1 178 ret void 179} 180 181define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 182; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16: 183; SKX: ## %bb.0: ## %entry 184; SKX-NEXT: vmovaps %xmm0, (%rdi) 185; SKX-NEXT: vzeroupper 186; SKX-NEXT: retq 187entry: 188 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 189 %1 = bitcast float* %addr to <4 x float>* 190 store <4 x float> %0, <4 x float>* %1, align 16 191 ret void 192} 193 194define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 195; SKX-LABEL: extract_subvector256_v2i64_store_lo: 196; SKX: ## %bb.0: ## %entry 197; SKX-NEXT: vmovups %xmm0, (%rdi) 198; SKX-NEXT: vzeroupper 199; SKX-NEXT: retq 200entry: 201 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 202 %1 = bitcast i64* %addr to <2 x i64>* 203 store <2 x i64> %0, <2 x i64>* %1, align 1 204 ret void 205} 206 207define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 208; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16: 209; SKX: ## %bb.0: ## %entry 210; SKX-NEXT: vmovaps %xmm0, (%rdi) 211; SKX-NEXT: vzeroupper 212; SKX-NEXT: retq 213entry: 214 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 215 %1 = bitcast i64* %addr to <2 x i64>* 216 store <2 x i64> %0, <2 x i64>* %1, align 16 217 ret void 218} 219 220define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 221; SKX-LABEL: extract_subvector256_v4i32_store_lo: 222; SKX: ## %bb.0: ## %entry 223; SKX-NEXT: vmovups %xmm0, (%rdi) 224; SKX-NEXT: vzeroupper 225; SKX-NEXT: retq 226entry: 227 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 228 %1 = bitcast i32* %addr to <4 x i32>* 229 store <4 x i32> %0, <4 x i32>* %1, align 1 230 ret void 231} 232 233define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 234; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16: 235; SKX: ## %bb.0: ## %entry 236; SKX-NEXT: vmovaps %xmm0, (%rdi) 237; SKX-NEXT: vzeroupper 238; SKX-NEXT: retq 239entry: 240 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 241 %1 = bitcast i32* %addr to <4 x i32>* 242 store <4 x i32> %0, <4 x i32>* %1, align 16 243 ret void 244} 245 246define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 247; SKX-LABEL: extract_subvector256_v8i16_store_lo: 248; SKX: ## %bb.0: ## %entry 249; SKX-NEXT: vmovups %xmm0, (%rdi) 250; SKX-NEXT: vzeroupper 251; SKX-NEXT: retq 252entry: 253 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 254 %1 = bitcast i16* %addr to <8 x i16>* 255 store <8 x i16> %0, <8 x i16>* %1, align 1 256 ret void 257} 258 259define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 260; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16: 261; SKX: ## %bb.0: ## %entry 262; SKX-NEXT: vmovaps %xmm0, (%rdi) 263; SKX-NEXT: vzeroupper 264; SKX-NEXT: retq 265entry: 266 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 267 %1 = bitcast i16* %addr to <8 x i16>* 268 store <8 x i16> %0, <8 x i16>* %1, align 16 269 ret void 270} 271 272define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 273; SKX-LABEL: extract_subvector256_v16i8_store_lo: 274; SKX: ## %bb.0: ## %entry 275; SKX-NEXT: vmovups %xmm0, (%rdi) 276; SKX-NEXT: vzeroupper 277; SKX-NEXT: retq 278entry: 279 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 280 %1 = bitcast i8* %addr to <16 x i8>* 281 store <16 x i8> %0, <16 x i8>* %1, align 1 282 ret void 283} 284 285define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 286; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16: 287; SKX: ## %bb.0: ## %entry 288; SKX-NEXT: vmovaps %xmm0, (%rdi) 289; SKX-NEXT: vzeroupper 290; SKX-NEXT: retq 291entry: 292 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 293 %1 = bitcast i8* %addr to <16 x i8>* 294 store <16 x i8> %0, <16 x i8>* %1, align 16 295 ret void 296} 297 298define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 299; SKX-LABEL: extract_subvector512_v2f64_store_lo: 300; SKX: ## %bb.0: ## %entry 301; SKX-NEXT: vmovups %xmm0, (%rdi) 302; SKX-NEXT: vzeroupper 303; SKX-NEXT: retq 304entry: 305 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 306 %1 = bitcast double* %addr to <2 x double>* 307 store <2 x double> %0, <2 x double>* %1, align 1 308 ret void 309} 310 311define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 312; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16: 313; SKX: ## %bb.0: ## %entry 314; SKX-NEXT: vmovaps %xmm0, (%rdi) 315; SKX-NEXT: vzeroupper 316; SKX-NEXT: retq 317entry: 318 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 319 %1 = bitcast double* %addr to <2 x double>* 320 store <2 x double> %0, <2 x double>* %1, align 16 321 ret void 322} 323 324define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 325; SKX-LABEL: extract_subvector512_v4f32_store_lo: 326; SKX: ## %bb.0: ## %entry 327; SKX-NEXT: vmovups %xmm0, (%rdi) 328; SKX-NEXT: vzeroupper 329; SKX-NEXT: retq 330entry: 331 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 332 %1 = bitcast float* %addr to <4 x float>* 333 store <4 x float> %0, <4 x float>* %1, align 1 334 ret void 335} 336 337define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 338; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16: 339; SKX: ## %bb.0: ## %entry 340; SKX-NEXT: vmovaps %xmm0, (%rdi) 341; SKX-NEXT: vzeroupper 342; SKX-NEXT: retq 343entry: 344 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 345 %1 = bitcast float* %addr to <4 x float>* 346 store <4 x float> %0, <4 x float>* %1, align 16 347 ret void 348} 349 350define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 351; SKX-LABEL: extract_subvector512_v2i64_store_lo: 352; SKX: ## %bb.0: ## %entry 353; SKX-NEXT: vmovups %xmm0, (%rdi) 354; SKX-NEXT: vzeroupper 355; SKX-NEXT: retq 356entry: 357 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 358 %1 = bitcast i64* %addr to <2 x i64>* 359 store <2 x i64> %0, <2 x i64>* %1, align 1 360 ret void 361} 362 363define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 364; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16: 365; SKX: ## %bb.0: ## %entry 366; SKX-NEXT: vmovaps %xmm0, (%rdi) 367; SKX-NEXT: vzeroupper 368; SKX-NEXT: retq 369entry: 370 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 371 %1 = bitcast i64* %addr to <2 x i64>* 372 store <2 x i64> %0, <2 x i64>* %1, align 16 373 ret void 374} 375 376define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 377; SKX-LABEL: extract_subvector512_v4i32_store_lo: 378; SKX: ## %bb.0: ## %entry 379; SKX-NEXT: vmovups %xmm0, (%rdi) 380; SKX-NEXT: vzeroupper 381; SKX-NEXT: retq 382entry: 383 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 384 %1 = bitcast i32* %addr to <4 x i32>* 385 store <4 x i32> %0, <4 x i32>* %1, align 1 386 ret void 387} 388 389define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 390; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16: 391; SKX: ## %bb.0: ## %entry 392; SKX-NEXT: vmovaps %xmm0, (%rdi) 393; SKX-NEXT: vzeroupper 394; SKX-NEXT: retq 395entry: 396 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 397 %1 = bitcast i32* %addr to <4 x i32>* 398 store <4 x i32> %0, <4 x i32>* %1, align 16 399 ret void 400} 401 402define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 403; SKX-LABEL: extract_subvector512_v8i16_store_lo: 404; SKX: ## %bb.0: ## %entry 405; SKX-NEXT: vmovups %xmm0, (%rdi) 406; SKX-NEXT: vzeroupper 407; SKX-NEXT: retq 408entry: 409 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 410 %1 = bitcast i16* %addr to <8 x i16>* 411 store <8 x i16> %0, <8 x i16>* %1, align 1 412 ret void 413} 414 415define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 416; SKX-LABEL: extract_subvector512_v16i8_store_lo: 417; SKX: ## %bb.0: ## %entry 418; SKX-NEXT: vmovups %xmm0, (%rdi) 419; SKX-NEXT: vzeroupper 420; SKX-NEXT: retq 421entry: 422 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 423 %1 = bitcast i8* %addr to <16 x i8>* 424 store <16 x i8> %0, <16 x i8>* %1, align 1 425 ret void 426} 427 428define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 429; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16: 430; SKX: ## %bb.0: ## %entry 431; SKX-NEXT: vmovaps %xmm0, (%rdi) 432; SKX-NEXT: vzeroupper 433; SKX-NEXT: retq 434entry: 435 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 436 %1 = bitcast i8* %addr to <16 x i8>* 437 store <16 x i8> %0, <16 x i8>* %1, align 16 438 ret void 439} 440 441define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 442; SKX-LABEL: extract_subvector512_v4f64_store_lo: 443; SKX: ## %bb.0: ## %entry 444; SKX-NEXT: vmovups %ymm0, (%rdi) 445; SKX-NEXT: vzeroupper 446; SKX-NEXT: retq 447entry: 448 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 449 %1 = bitcast double* %addr to <4 x double>* 450 store <4 x double> %0, <4 x double>* %1, align 1 451 ret void 452} 453 454define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 455; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16: 456; SKX: ## %bb.0: ## %entry 457; SKX-NEXT: vmovups %ymm0, (%rdi) 458; SKX-NEXT: vzeroupper 459; SKX-NEXT: retq 460entry: 461 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 462 %1 = bitcast double* %addr to <4 x double>* 463 store <4 x double> %0, <4 x double>* %1, align 16 464 ret void 465} 466 467define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 468; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32: 469; SKX: ## %bb.0: ## %entry 470; SKX-NEXT: vmovaps %ymm0, (%rdi) 471; SKX-NEXT: vzeroupper 472; SKX-NEXT: retq 473entry: 474 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 475 %1 = bitcast double* %addr to <4 x double>* 476 store <4 x double> %0, <4 x double>* %1, align 32 477 ret void 478} 479 480define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 481; SKX-LABEL: extract_subvector512_v8f32_store_lo: 482; SKX: ## %bb.0: ## %entry 483; SKX-NEXT: vmovups %ymm0, (%rdi) 484; SKX-NEXT: vzeroupper 485; SKX-NEXT: retq 486entry: 487 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 488 %1 = bitcast float* %addr to <8 x float>* 489 store <8 x float> %0, <8 x float>* %1, align 1 490 ret void 491} 492 493define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 494; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16: 495; SKX: ## %bb.0: ## %entry 496; SKX-NEXT: vmovups %ymm0, (%rdi) 497; SKX-NEXT: vzeroupper 498; SKX-NEXT: retq 499entry: 500 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 501 %1 = bitcast float* %addr to <8 x float>* 502 store <8 x float> %0, <8 x float>* %1, align 16 503 ret void 504} 505 506define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 507; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32: 508; SKX: ## %bb.0: ## %entry 509; SKX-NEXT: vmovaps %ymm0, (%rdi) 510; SKX-NEXT: vzeroupper 511; SKX-NEXT: retq 512entry: 513 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 514 %1 = bitcast float* %addr to <8 x float>* 515 store <8 x float> %0, <8 x float>* %1, align 32 516 ret void 517} 518 519define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 520; SKX-LABEL: extract_subvector512_v4i64_store_lo: 521; SKX: ## %bb.0: ## %entry 522; SKX-NEXT: vmovups %ymm0, (%rdi) 523; SKX-NEXT: vzeroupper 524; SKX-NEXT: retq 525entry: 526 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 527 %1 = bitcast i64* %addr to <4 x i64>* 528 store <4 x i64> %0, <4 x i64>* %1, align 1 529 ret void 530} 531 532define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 533; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16: 534; SKX: ## %bb.0: ## %entry 535; SKX-NEXT: vmovups %ymm0, (%rdi) 536; SKX-NEXT: vzeroupper 537; SKX-NEXT: retq 538entry: 539 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 540 %1 = bitcast i64* %addr to <4 x i64>* 541 store <4 x i64> %0, <4 x i64>* %1, align 16 542 ret void 543} 544 545define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 546; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32: 547; SKX: ## %bb.0: ## %entry 548; SKX-NEXT: vmovaps %ymm0, (%rdi) 549; SKX-NEXT: vzeroupper 550; SKX-NEXT: retq 551entry: 552 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 553 %1 = bitcast i64* %addr to <4 x i64>* 554 store <4 x i64> %0, <4 x i64>* %1, align 32 555 ret void 556} 557 558define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 559; SKX-LABEL: extract_subvector512_v8i32_store_lo: 560; SKX: ## %bb.0: ## %entry 561; SKX-NEXT: vmovups %ymm0, (%rdi) 562; SKX-NEXT: vzeroupper 563; SKX-NEXT: retq 564entry: 565 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 566 %1 = bitcast i32* %addr to <8 x i32>* 567 store <8 x i32> %0, <8 x i32>* %1, align 1 568 ret void 569} 570 571define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 572; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16: 573; SKX: ## %bb.0: ## %entry 574; SKX-NEXT: vmovups %ymm0, (%rdi) 575; SKX-NEXT: vzeroupper 576; SKX-NEXT: retq 577entry: 578 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 579 %1 = bitcast i32* %addr to <8 x i32>* 580 store <8 x i32> %0, <8 x i32>* %1, align 16 581 ret void 582} 583 584define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 585; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32: 586; SKX: ## %bb.0: ## %entry 587; SKX-NEXT: vmovaps %ymm0, (%rdi) 588; SKX-NEXT: vzeroupper 589; SKX-NEXT: retq 590entry: 591 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 592 %1 = bitcast i32* %addr to <8 x i32>* 593 store <8 x i32> %0, <8 x i32>* %1, align 32 594 ret void 595} 596 597define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 598; SKX-LABEL: extract_subvector512_v16i16_store_lo: 599; SKX: ## %bb.0: ## %entry 600; SKX-NEXT: vmovups %ymm0, (%rdi) 601; SKX-NEXT: vzeroupper 602; SKX-NEXT: retq 603entry: 604 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 605 %1 = bitcast i16* %addr to <16 x i16>* 606 store <16 x i16> %0, <16 x i16>* %1, align 1 607 ret void 608} 609 610define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 611; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16: 612; SKX: ## %bb.0: ## %entry 613; SKX-NEXT: vmovups %ymm0, (%rdi) 614; SKX-NEXT: vzeroupper 615; SKX-NEXT: retq 616entry: 617 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 618 %1 = bitcast i16* %addr to <16 x i16>* 619 store <16 x i16> %0, <16 x i16>* %1, align 16 620 ret void 621} 622 623define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 624; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32: 625; SKX: ## %bb.0: ## %entry 626; SKX-NEXT: vmovaps %ymm0, (%rdi) 627; SKX-NEXT: vzeroupper 628; SKX-NEXT: retq 629entry: 630 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 631 %1 = bitcast i16* %addr to <16 x i16>* 632 store <16 x i16> %0, <16 x i16>* %1, align 32 633 ret void 634} 635 636define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 637; SKX-LABEL: extract_subvector512_v32i8_store_lo: 638; SKX: ## %bb.0: ## %entry 639; SKX-NEXT: vmovups %ymm0, (%rdi) 640; SKX-NEXT: vzeroupper 641; SKX-NEXT: retq 642entry: 643 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 644 %1 = bitcast i8* %addr to <32 x i8>* 645 store <32 x i8> %0, <32 x i8>* %1, align 1 646 ret void 647} 648 649define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 650; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16: 651; SKX: ## %bb.0: ## %entry 652; SKX-NEXT: vmovups %ymm0, (%rdi) 653; SKX-NEXT: vzeroupper 654; SKX-NEXT: retq 655entry: 656 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 657 %1 = bitcast i8* %addr to <32 x i8>* 658 store <32 x i8> %0, <32 x i8>* %1, align 16 659 ret void 660} 661 662define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 663; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32: 664; SKX: ## %bb.0: ## %entry 665; SKX-NEXT: vmovaps %ymm0, (%rdi) 666; SKX-NEXT: vzeroupper 667; SKX-NEXT: retq 668entry: 669 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 670 %1 = bitcast i8* %addr to <32 x i8>* 671 store <32 x i8> %0, <32 x i8>* %1, align 32 672 ret void 673} 674 675define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) { 676; SKX-LABEL: test_mm512_mask_extractf64x4_pd: 677; SKX: ## %bb.0: ## %entry 678; SKX-NEXT: kmovd %edi, %k1 679; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1} 680; SKX-NEXT: retq 681entry: 682 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 683 %0 = bitcast i8 %__U to <8 x i1> 684 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 685 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W 686 ret <4 x double> %1 687} 688 689define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) { 690; SKX-LABEL: test_mm512_maskz_extractf64x4_pd: 691; SKX: ## %bb.0: ## %entry 692; SKX-NEXT: kmovd %edi, %k1 693; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z} 694; SKX-NEXT: retq 695entry: 696 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 697 %0 = bitcast i8 %__U to <8 x i1> 698 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 699 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer 700 ret <4 x double> %1 701} 702 703define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) { 704; SKX-LABEL: test_mm512_mask_extractf32x4_ps: 705; SKX: ## %bb.0: ## %entry 706; SKX-NEXT: kmovd %edi, %k1 707; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1} 708; SKX-NEXT: vzeroupper 709; SKX-NEXT: retq 710entry: 711 %0 = bitcast <8 x double> %__A to <16 x float> 712 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 713 %1 = bitcast i8 %__U to <8 x i1> 714 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 715 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W 716 ret <4 x float> %2 717} 718 719define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) { 720; SKX-LABEL: test_mm512_maskz_extractf32x4_ps: 721; SKX: ## %bb.0: ## %entry 722; SKX-NEXT: kmovd %edi, %k1 723; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z} 724; SKX-NEXT: vzeroupper 725; SKX-NEXT: retq 726entry: 727 %0 = bitcast <8 x double> %__A to <16 x float> 728 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 729 %1 = bitcast i8 %__U to <8 x i1> 730 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 731 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer 732 ret <4 x float> %2 733} 734 735define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) { 736; SKX-LABEL: test_mm256_mask_extractf64x2_pd: 737; SKX: ## %bb.0: ## %entry 738; SKX-NEXT: kmovd %edi, %k1 739; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1} 740; SKX-NEXT: vzeroupper 741; SKX-NEXT: retq 742entry: 743 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> 744 %0 = bitcast i8 %__U to <8 x i1> 745 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 746 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W 747 ret <2 x double> %1 748} 749 750define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) { 751; SKX-LABEL: test_mm256_maskz_extractf64x2_pd: 752; SKX: ## %bb.0: ## %entry 753; SKX-NEXT: kmovd %edi, %k1 754; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} 755; SKX-NEXT: vzeroupper 756; SKX-NEXT: retq 757entry: 758 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3> 759 %0 = bitcast i8 %__U to <8 x i1> 760 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 761 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer 762 ret <2 x double> %1 763} 764 765define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { 766; SKX-LABEL: test_mm256_mask_extracti64x2_epi64: 767; SKX: ## %bb.0: ## %entry 768; SKX-NEXT: kmovd %edi, %k1 769; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1} 770; SKX-NEXT: vzeroupper 771; SKX-NEXT: retq 772entry: 773 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 774 %0 = bitcast i8 %__U to <8 x i1> 775 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 776 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W 777 ret <2 x i64> %1 778} 779 780define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) { 781; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64: 782; SKX: ## %bb.0: ## %entry 783; SKX-NEXT: kmovd %edi, %k1 784; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z} 785; SKX-NEXT: vzeroupper 786; SKX-NEXT: retq 787entry: 788 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 789 %0 = bitcast i8 %__U to <8 x i1> 790 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 791 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer 792 ret <2 x i64> %1 793} 794 795define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) { 796; SKX-LABEL: test_mm256_mask_extractf32x4_ps: 797; SKX: ## %bb.0: ## %entry 798; SKX-NEXT: kmovd %edi, %k1 799; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1} 800; SKX-NEXT: vzeroupper 801; SKX-NEXT: retq 802entry: 803 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 804 %0 = bitcast i8 %__U to <8 x i1> 805 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 806 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W 807 ret <4 x float> %1 808} 809 810define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) { 811; SKX-LABEL: test_mm256_maskz_extractf32x4_ps: 812; SKX: ## %bb.0: ## %entry 813; SKX-NEXT: kmovd %edi, %k1 814; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} 815; SKX-NEXT: vzeroupper 816; SKX-NEXT: retq 817entry: 818 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 819 %0 = bitcast i8 %__U to <8 x i1> 820 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 821 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer 822 ret <4 x float> %1 823} 824 825define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) { 826; SKX-LABEL: test_mm256_mask_extracti32x4_epi32: 827; SKX: ## %bb.0: ## %entry 828; SKX-NEXT: kmovd %edi, %k1 829; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1} 830; SKX-NEXT: vzeroupper 831; SKX-NEXT: retq 832entry: 833 %0 = bitcast <4 x i64> %__A to <8 x i32> 834 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 835 %1 = bitcast <2 x i64> %__W to <4 x i32> 836 %2 = bitcast i8 %__U to <8 x i1> 837 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 838 %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1 839 %4 = bitcast <4 x i32> %3 to <2 x i64> 840 ret <2 x i64> %4 841} 842 843define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) { 844; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32: 845; SKX: ## %bb.0: ## %entry 846; SKX-NEXT: kmovd %edi, %k1 847; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z} 848; SKX-NEXT: vzeroupper 849; SKX-NEXT: retq 850entry: 851 %0 = bitcast <4 x i64> %__A to <8 x i32> 852 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 853 %1 = bitcast i8 %__U to <8 x i1> 854 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 855 %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer 856 %3 = bitcast <4 x i32> %2 to <2 x i64> 857 ret <2 x i64> %3 858} 859 860define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) { 861; SKX-LABEL: test_mm512_mask_extractf32x8_ps: 862; SKX: ## %bb.0: ## %entry 863; SKX-NEXT: kmovd %edi, %k1 864; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1} 865; SKX-NEXT: retq 866entry: 867 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 868 %0 = bitcast i8 %__U to <8 x i1> 869 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W 870 ret <8 x float> %1 871} 872 873define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) { 874; SKX-LABEL: test_mm512_maskz_extractf32x8_ps: 875; SKX: ## %bb.0: ## %entry 876; SKX-NEXT: kmovd %edi, %k1 877; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} 878; SKX-NEXT: retq 879entry: 880 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 881 %0 = bitcast i8 %__U to <8 x i1> 882 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer 883 ret <8 x float> %1 884} 885 886define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) { 887; SKX-LABEL: test_mm512_mask_extractf64x2_pd: 888; SKX: ## %bb.0: ## %entry 889; SKX-NEXT: kmovd %edi, %k1 890; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1} 891; SKX-NEXT: vzeroupper 892; SKX-NEXT: retq 893entry: 894 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> 895 %0 = bitcast i8 %__U to <8 x i1> 896 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 897 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W 898 ret <2 x double> %1 899} 900 901define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) { 902; SKX-LABEL: test_mm512_maskz_extractf64x2_pd: 903; SKX: ## %bb.0: ## %entry 904; SKX-NEXT: kmovd %edi, %k1 905; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z} 906; SKX-NEXT: vzeroupper 907; SKX-NEXT: retq 908entry: 909 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7> 910 %0 = bitcast i8 %__U to <8 x i1> 911 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 912 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer 913 ret <2 x double> %1 914} 915