1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s 3 4 5define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { 6; SKX-LABEL: extract_subvector128_v32i16: 7; SKX: ## BB#0: 8; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 9; SKX-NEXT: retq 10 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 11 ret <8 x i16> %r1 12} 13 14define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind { 15; SKX-LABEL: extract_subvector128_v32i16_first_element: 16; SKX: ## BB#0: 17; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 18; SKX-NEXT: retq 19 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 20 ret <8 x i16> %r1 21} 22 23define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { 24; SKX-LABEL: extract_subvector128_v64i8: 25; SKX: ## BB#0: 26; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 27; SKX-NEXT: retq 28 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> 29 ret <16 x i8> %r1 30} 31 32define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind { 33; SKX-LABEL: extract_subvector128_v64i8_first_element: 34; SKX: ## BB#0: 35; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 36; SKX-NEXT: retq 37 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 38 ret <16 x i8> %r1 39} 40 41 42define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { 43; SKX-LABEL: extract_subvector256_v32i16: 44; SKX: ## BB#0: 45; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 46; SKX-NEXT: retq 47 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 48 ret <16 x i16> %r1 49} 50 51define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { 52; SKX-LABEL: extract_subvector256_v64i8: 53; SKX: ## BB#0: 54; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 55; SKX-NEXT: retq 56 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 57 ret <32 x i8> %r1 58} 59 60define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 61; SKX-LABEL: extract_subvector256_v8f64_store: 62; SKX: ## BB#0: ## %entry 63; SKX-NEXT: vextractf64x2 $1, %ymm0, (%rdi) 64; SKX-NEXT: retq 65entry: 66 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3> 67 %1 = bitcast double* %addr to <2 x double>* 68 store <2 x double> %0, <2 x double>* %1, align 1 69 ret void 70} 71 72define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 73; SKX-LABEL: extract_subvector256_v8f32_store: 74; SKX: ## BB#0: ## %entry 75; SKX-NEXT: vextractf32x4 $1, %ymm0, (%rdi) 76; SKX-NEXT: retq 77entry: 78 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 79 %1 = bitcast float* %addr to <4 x float>* 80 store <4 x float> %0, <4 x float>* %1, align 1 81 ret void 82} 83 84define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 85; SKX-LABEL: extract_subvector256_v4i64_store: 86; SKX: ## BB#0: ## %entry 87; SKX-NEXT: vextracti64x2 $1, %ymm0, (%rdi) 88; SKX-NEXT: retq 89entry: 90 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 91 %1 = bitcast i64* %addr to <2 x i64>* 92 store <2 x i64> %0, <2 x i64>* %1, align 1 93 ret void 94} 95 96define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 97; SKX-LABEL: extract_subvector256_v8i32_store: 98; SKX: ## BB#0: ## %entry 99; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) 100; SKX-NEXT: retq 101entry: 102 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 103 %1 = bitcast i32* %addr to <4 x i32>* 104 store <4 x i32> %0, <4 x i32>* %1, align 1 105 ret void 106} 107 108define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 109; SKX-LABEL: extract_subvector256_v16i16_store: 110; SKX: ## BB#0: ## %entry 111; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) 112; SKX-NEXT: retq 113entry: 114 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 115 %1 = bitcast i16* %addr to <8 x i16>* 116 store <8 x i16> %0, <8 x i16>* %1, align 1 117 ret void 118} 119 120define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 121; SKX-LABEL: extract_subvector256_v32i8_store: 122; SKX: ## BB#0: ## %entry 123; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) 124; SKX-NEXT: retq 125entry: 126 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 127 %1 = bitcast i8* %addr to <16 x i8>* 128 store <16 x i8> %0, <16 x i8>* %1, align 1 129 ret void 130} 131 132define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { 133; SKX-LABEL: extract_subvector256_v4f64_store_lo: 134; SKX: ## BB#0: ## %entry 135; SKX-NEXT: vmovupd %xmm0, (%rdi) 136; SKX-NEXT: retq 137entry: 138 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1> 139 %1 = bitcast double* %addr to <2 x double>* 140 store <2 x double> %0, <2 x double>* %1, align 1 141 ret void 142} 143 144define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { 145; SKX-LABEL: extract_subvector256_v4f32_store_lo: 146; SKX: ## BB#0: ## %entry 147; SKX-NEXT: vmovups %xmm0, (%rdi) 148; SKX-NEXT: retq 149entry: 150 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 151 %1 = bitcast float* %addr to <4 x float>* 152 store <4 x float> %0, <4 x float>* %1, align 1 153 ret void 154} 155 156define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { 157; SKX-LABEL: extract_subvector256_v2i64_store_lo: 158; SKX: ## BB#0: ## %entry 159; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) 160; SKX-NEXT: retq 161entry: 162 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 163 %1 = bitcast i64* %addr to <2 x i64>* 164 store <2 x i64> %0, <2 x i64>* %1, align 1 165 ret void 166} 167 168define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { 169; SKX-LABEL: extract_subvector256_v4i32_store_lo: 170; SKX: ## BB#0: ## %entry 171; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 172; SKX-NEXT: retq 173entry: 174 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 175 %1 = bitcast i32* %addr to <4 x i32>* 176 store <4 x i32> %0, <4 x i32>* %1, align 1 177 ret void 178} 179 180define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { 181; SKX-LABEL: extract_subvector256_v8i16_store_lo: 182; SKX: ## BB#0: ## %entry 183; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 184; SKX-NEXT: retq 185entry: 186 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 187 %1 = bitcast i16* %addr to <8 x i16>* 188 store <8 x i16> %0, <8 x i16>* %1, align 1 189 ret void 190} 191 192define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { 193; SKX-LABEL: extract_subvector256_v16i8_store_lo: 194; SKX: ## BB#0: ## %entry 195; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 196; SKX-NEXT: retq 197entry: 198 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 199 %1 = bitcast i8* %addr to <16 x i8>* 200 store <16 x i8> %0, <16 x i8>* %1, align 1 201 ret void 202} 203 204define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 205; SKX-LABEL: extract_subvector512_v2f64_store_lo: 206; SKX: ## BB#0: ## %entry 207; SKX-NEXT: vmovupd %xmm0, (%rdi) 208; SKX-NEXT: retq 209entry: 210 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1> 211 %1 = bitcast double* %addr to <2 x double>* 212 store <2 x double> %0, <2 x double>* %1, align 1 213 ret void 214} 215 216define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 217; SKX-LABEL: extract_subvector512_v4f32_store_lo: 218; SKX: ## BB#0: ## %entry 219; SKX-NEXT: vmovups %xmm0, (%rdi) 220; SKX-NEXT: retq 221entry: 222 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 223 %1 = bitcast float* %addr to <4 x float>* 224 store <4 x float> %0, <4 x float>* %1, align 1 225 ret void 226} 227 228define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 229; SKX-LABEL: extract_subvector512_v2i64_store_lo: 230; SKX: ## BB#0: ## %entry 231; SKX-NEXT: vmovdqu64 %xmm0, (%rdi) 232; SKX-NEXT: retq 233entry: 234 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1> 235 %1 = bitcast i64* %addr to <2 x i64>* 236 store <2 x i64> %0, <2 x i64>* %1, align 1 237 ret void 238} 239 240define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 241; SKX-LABEL: extract_subvector512_v4i32_store_lo: 242; SKX: ## BB#0: ## %entry 243; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 244; SKX-NEXT: retq 245entry: 246 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 247 %1 = bitcast i32* %addr to <4 x i32>* 248 store <4 x i32> %0, <4 x i32>* %1, align 1 249 ret void 250} 251 252define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 253; SKX-LABEL: extract_subvector512_v8i16_store_lo: 254; SKX: ## BB#0: ## %entry 255; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 256; SKX-NEXT: retq 257entry: 258 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 259 %1 = bitcast i16* %addr to <8 x i16>* 260 store <8 x i16> %0, <8 x i16>* %1, align 1 261 ret void 262} 263 264define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 265; SKX-LABEL: extract_subvector512_v16i8_store_lo: 266; SKX: ## BB#0: ## %entry 267; SKX-NEXT: vmovdqu32 %xmm0, (%rdi) 268; SKX-NEXT: retq 269entry: 270 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 271 %1 = bitcast i8* %addr to <16 x i8>* 272 store <16 x i8> %0, <16 x i8>* %1, align 1 273 ret void 274} 275 276define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp { 277; SKX-LABEL: extract_subvector512_v4f64_store_lo: 278; SKX: ## BB#0: ## %entry 279; SKX-NEXT: vmovupd %ymm0, (%rdi) 280; SKX-NEXT: retq 281entry: 282 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 283 %1 = bitcast double* %addr to <4 x double>* 284 store <4 x double> %0, <4 x double>* %1, align 1 285 ret void 286} 287 288define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { 289; SKX-LABEL: extract_subvector512_v8f32_store_lo: 290; SKX: ## BB#0: ## %entry 291; SKX-NEXT: vmovups %ymm0, (%rdi) 292; SKX-NEXT: retq 293entry: 294 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 295 %1 = bitcast float* %addr to <8 x float>* 296 store <8 x float> %0, <8 x float>* %1, align 1 297 ret void 298} 299 300define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp { 301; SKX-LABEL: extract_subvector512_v4i64_store_lo: 302; SKX: ## BB#0: ## %entry 303; SKX-NEXT: vmovdqu64 %ymm0, (%rdi) 304; SKX-NEXT: retq 305entry: 306 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 307 %1 = bitcast i64* %addr to <4 x i64>* 308 store <4 x i64> %0, <4 x i64>* %1, align 1 309 ret void 310} 311 312define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp { 313; SKX-LABEL: extract_subvector512_v8i32_store_lo: 314; SKX: ## BB#0: ## %entry 315; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) 316; SKX-NEXT: retq 317entry: 318 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 319 %1 = bitcast i32* %addr to <8 x i32>* 320 store <8 x i32> %0, <8 x i32>* %1, align 1 321 ret void 322} 323 324define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp { 325; SKX-LABEL: extract_subvector512_v16i16_store_lo: 326; SKX: ## BB#0: ## %entry 327; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) 328; SKX-NEXT: retq 329entry: 330 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 331 %1 = bitcast i16* %addr to <16 x i16>* 332 store <16 x i16> %0, <16 x i16>* %1, align 1 333 ret void 334} 335 336define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp { 337; SKX-LABEL: extract_subvector512_v32i8_store_lo: 338; SKX: ## BB#0: ## %entry 339; SKX-NEXT: vmovdqu32 %ymm0, (%rdi) 340; SKX-NEXT: retq 341entry: 342 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 343 %1 = bitcast i8* %addr to <32 x i8>* 344 store <32 x i8> %0, <32 x i8>* %1, align 1 345 ret void 346} 347