1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s 3; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s 4; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s 5; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s 6 7declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro) 8 9define <2 x i32> @masked_gather_v2i32(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 10; X86-LABEL: masked_gather_v2i32: 11; X86: # %bb.0: # %entry 12; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 13; X86-NEXT: vpslld $31, %xmm0, %xmm0 14; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 15; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 16; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 17; X86-NEXT: vmovdqa %xmm1, %xmm0 18; X86-NEXT: retl 19; 20; X64-LABEL: masked_gather_v2i32: 21; X64: # %bb.0: # %entry 22; X64-NEXT: vmovdqa (%rdi), %xmm2 23; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 24; X64-NEXT: vpslld $31, %xmm0, %xmm0 25; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 26; X64-NEXT: vmovdqa %xmm1, %xmm0 27; X64-NEXT: retq 28; 29; NOGATHER-LABEL: masked_gather_v2i32: 30; NOGATHER: # %bb.0: # %entry 31; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 32; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 33; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 34; NOGATHER-NEXT: testb $1, %al 35; NOGATHER-NEXT: jne .LBB0_1 36; NOGATHER-NEXT: # %bb.2: # %else 37; NOGATHER-NEXT: testb $2, %al 38; NOGATHER-NEXT: jne .LBB0_3 39; NOGATHER-NEXT: .LBB0_4: # %else2 40; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 41; NOGATHER-NEXT: retq 42; NOGATHER-NEXT: .LBB0_1: # %cond.load 43; NOGATHER-NEXT: vmovq %xmm2, %rcx 44; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm1 45; NOGATHER-NEXT: testb $2, %al 46; NOGATHER-NEXT: je .LBB0_4 47; NOGATHER-NEXT: .LBB0_3: # %cond.load1 48; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 49; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1 50; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 51; NOGATHER-NEXT: retq 52entry: 53 %ld = load <2 x i32*>, <2 x i32*>* %ptr 54 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 55 ret <2 x i32> %res 56} 57 58define <4 x i32> @masked_gather_v2i32_concat(<2 x i32*>* %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 59; X86-LABEL: masked_gather_v2i32_concat: 60; X86: # %bb.0: # %entry 61; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 62; X86-NEXT: vpslld $31, %xmm0, %xmm0 63; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 64; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 65; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 66; X86-NEXT: vmovdqa %xmm1, %xmm0 67; X86-NEXT: retl 68; 69; X64-LABEL: masked_gather_v2i32_concat: 70; X64: # %bb.0: # %entry 71; X64-NEXT: vmovdqa (%rdi), %xmm2 72; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 73; X64-NEXT: vpslld $31, %xmm0, %xmm0 74; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 75; X64-NEXT: vmovdqa %xmm1, %xmm0 76; X64-NEXT: retq 77; 78; NOGATHER-LABEL: masked_gather_v2i32_concat: 79; NOGATHER: # %bb.0: # %entry 80; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 81; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 82; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 83; NOGATHER-NEXT: testb $1, %al 84; NOGATHER-NEXT: jne .LBB1_1 85; NOGATHER-NEXT: # %bb.2: # %else 86; NOGATHER-NEXT: testb $2, %al 87; NOGATHER-NEXT: jne .LBB1_3 88; NOGATHER-NEXT: .LBB1_4: # %else2 89; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 90; NOGATHER-NEXT: retq 91; NOGATHER-NEXT: .LBB1_1: # %cond.load 92; NOGATHER-NEXT: vmovq %xmm2, %rcx 93; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm1 94; NOGATHER-NEXT: testb $2, %al 95; NOGATHER-NEXT: je .LBB1_4 96; NOGATHER-NEXT: .LBB1_3: # %cond.load1 97; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 98; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1 99; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 100; NOGATHER-NEXT: retq 101entry: 102 %ld = load <2 x i32*>, <2 x i32*>* %ptr 103 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 104 %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 105 ret <4 x i32> %res2 106} 107 108declare <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro) 109 110define <2 x float> @masked_gather_v2float(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) { 111; X86-LABEL: masked_gather_v2float: 112; X86: # %bb.0: # %entry 113; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 114; X86-NEXT: vpslld $31, %xmm0, %xmm0 115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 116; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 117; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 118; X86-NEXT: vmovaps %xmm1, %xmm0 119; X86-NEXT: retl 120; 121; X64-LABEL: masked_gather_v2float: 122; X64: # %bb.0: # %entry 123; X64-NEXT: vmovaps (%rdi), %xmm2 124; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 125; X64-NEXT: vpslld $31, %xmm0, %xmm0 126; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 127; X64-NEXT: vmovaps %xmm1, %xmm0 128; X64-NEXT: retq 129; 130; NOGATHER-LABEL: masked_gather_v2float: 131; NOGATHER: # %bb.0: # %entry 132; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 133; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 134; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 135; NOGATHER-NEXT: testb $1, %al 136; NOGATHER-NEXT: jne .LBB2_1 137; NOGATHER-NEXT: # %bb.2: # %else 138; NOGATHER-NEXT: testb $2, %al 139; NOGATHER-NEXT: jne .LBB2_3 140; NOGATHER-NEXT: .LBB2_4: # %else2 141; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 142; NOGATHER-NEXT: retq 143; NOGATHER-NEXT: .LBB2_1: # %cond.load 144; NOGATHER-NEXT: vmovq %xmm2, %rcx 145; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 146; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 147; NOGATHER-NEXT: testb $2, %al 148; NOGATHER-NEXT: je .LBB2_4 149; NOGATHER-NEXT: .LBB2_3: # %cond.load1 150; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 151; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 152; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 153; NOGATHER-NEXT: retq 154entry: 155 %ld = load <2 x float*>, <2 x float*>* %ptr 156 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 157 ret <2 x float> %res 158} 159 160define <4 x float> @masked_gather_v2float_concat(<2 x float*>* %ptr, <2 x i1> %masks, <2 x float> %passthro) { 161; X86-LABEL: masked_gather_v2float_concat: 162; X86: # %bb.0: # %entry 163; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 164; X86-NEXT: vpslld $31, %xmm0, %xmm0 165; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 166; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 167; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 168; X86-NEXT: vmovaps %xmm1, %xmm0 169; X86-NEXT: retl 170; 171; X64-LABEL: masked_gather_v2float_concat: 172; X64: # %bb.0: # %entry 173; X64-NEXT: vmovaps (%rdi), %xmm2 174; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 175; X64-NEXT: vpslld $31, %xmm0, %xmm0 176; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 177; X64-NEXT: vmovaps %xmm1, %xmm0 178; X64-NEXT: retq 179; 180; NOGATHER-LABEL: masked_gather_v2float_concat: 181; NOGATHER: # %bb.0: # %entry 182; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 183; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 184; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 185; NOGATHER-NEXT: testb $1, %al 186; NOGATHER-NEXT: jne .LBB3_1 187; NOGATHER-NEXT: # %bb.2: # %else 188; NOGATHER-NEXT: testb $2, %al 189; NOGATHER-NEXT: jne .LBB3_3 190; NOGATHER-NEXT: .LBB3_4: # %else2 191; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 192; NOGATHER-NEXT: retq 193; NOGATHER-NEXT: .LBB3_1: # %cond.load 194; NOGATHER-NEXT: vmovq %xmm2, %rcx 195; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 196; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 197; NOGATHER-NEXT: testb $2, %al 198; NOGATHER-NEXT: je .LBB3_4 199; NOGATHER-NEXT: .LBB3_3: # %cond.load1 200; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 201; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 202; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 203; NOGATHER-NEXT: retq 204entry: 205 %ld = load <2 x float*>, <2 x float*>* %ptr 206 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 207 %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 208 ret <4 x float> %res2 209} 210 211 212declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro) 213 214define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) { 215; X86-LABEL: masked_gather_v4i32: 216; X86: # %bb.0: # %entry 217; X86-NEXT: vpslld $31, %xmm1, %xmm1 218; X86-NEXT: vpgatherdd %xmm1, (,%xmm0), %xmm2 219; X86-NEXT: vmovdqa %xmm2, %xmm0 220; X86-NEXT: retl 221; 222; X64-LABEL: masked_gather_v4i32: 223; X64: # %bb.0: # %entry 224; X64-NEXT: vpslld $31, %xmm1, %xmm1 225; X64-NEXT: vpgatherqd %xmm1, (,%ymm0), %xmm2 226; X64-NEXT: vmovdqa %xmm2, %xmm0 227; X64-NEXT: vzeroupper 228; X64-NEXT: retq 229; 230; NOGATHER-LABEL: masked_gather_v4i32: 231; NOGATHER: # %bb.0: # %entry 232; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1 233; NOGATHER-NEXT: vmovmskps %xmm1, %eax 234; NOGATHER-NEXT: testb $1, %al 235; NOGATHER-NEXT: je .LBB4_2 236; NOGATHER-NEXT: # %bb.1: # %cond.load 237; NOGATHER-NEXT: vmovq %xmm0, %rcx 238; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm2, %xmm2 239; NOGATHER-NEXT: .LBB4_2: # %else 240; NOGATHER-NEXT: testb $2, %al 241; NOGATHER-NEXT: je .LBB4_4 242; NOGATHER-NEXT: # %bb.3: # %cond.load1 243; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 244; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm2, %xmm2 245; NOGATHER-NEXT: .LBB4_4: # %else2 246; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 247; NOGATHER-NEXT: testb $4, %al 248; NOGATHER-NEXT: jne .LBB4_5 249; NOGATHER-NEXT: # %bb.6: # %else5 250; NOGATHER-NEXT: testb $8, %al 251; NOGATHER-NEXT: jne .LBB4_7 252; NOGATHER-NEXT: .LBB4_8: # %else8 253; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0 254; NOGATHER-NEXT: vzeroupper 255; NOGATHER-NEXT: retq 256; NOGATHER-NEXT: .LBB4_5: # %cond.load4 257; NOGATHER-NEXT: vmovq %xmm0, %rcx 258; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm2, %xmm2 259; NOGATHER-NEXT: testb $8, %al 260; NOGATHER-NEXT: je .LBB4_8 261; NOGATHER-NEXT: .LBB4_7: # %cond.load7 262; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 263; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2 264; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0 265; NOGATHER-NEXT: vzeroupper 266; NOGATHER-NEXT: retq 267entry: 268 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro) 269 ret <4 x i32> %res 270} 271 272declare <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro) 273 274define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <4 x float> %passthro) { 275; X86-LABEL: masked_gather_v4float: 276; X86: # %bb.0: # %entry 277; X86-NEXT: vpslld $31, %xmm1, %xmm1 278; X86-NEXT: vgatherdps %xmm1, (,%xmm0), %xmm2 279; X86-NEXT: vmovaps %xmm2, %xmm0 280; X86-NEXT: retl 281; 282; X64-LABEL: masked_gather_v4float: 283; X64: # %bb.0: # %entry 284; X64-NEXT: vpslld $31, %xmm1, %xmm1 285; X64-NEXT: vgatherqps %xmm1, (,%ymm0), %xmm2 286; X64-NEXT: vmovaps %xmm2, %xmm0 287; X64-NEXT: vzeroupper 288; X64-NEXT: retq 289; 290; NOGATHER-LABEL: masked_gather_v4float: 291; NOGATHER: # %bb.0: # %entry 292; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1 293; NOGATHER-NEXT: vmovmskps %xmm1, %eax 294; NOGATHER-NEXT: testb $1, %al 295; NOGATHER-NEXT: je .LBB5_2 296; NOGATHER-NEXT: # %bb.1: # %cond.load 297; NOGATHER-NEXT: vmovq %xmm0, %rcx 298; NOGATHER-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 299; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 300; NOGATHER-NEXT: .LBB5_2: # %else 301; NOGATHER-NEXT: testb $2, %al 302; NOGATHER-NEXT: je .LBB5_4 303; NOGATHER-NEXT: # %bb.3: # %cond.load1 304; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 305; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 306; NOGATHER-NEXT: .LBB5_4: # %else2 307; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 308; NOGATHER-NEXT: testb $4, %al 309; NOGATHER-NEXT: jne .LBB5_5 310; NOGATHER-NEXT: # %bb.6: # %else5 311; NOGATHER-NEXT: testb $8, %al 312; NOGATHER-NEXT: jne .LBB5_7 313; NOGATHER-NEXT: .LBB5_8: # %else8 314; NOGATHER-NEXT: vmovaps %xmm2, %xmm0 315; NOGATHER-NEXT: vzeroupper 316; NOGATHER-NEXT: retq 317; NOGATHER-NEXT: .LBB5_5: # %cond.load4 318; NOGATHER-NEXT: vmovq %xmm0, %rcx 319; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 320; NOGATHER-NEXT: testb $8, %al 321; NOGATHER-NEXT: je .LBB5_8 322; NOGATHER-NEXT: .LBB5_7: # %cond.load7 323; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 324; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] 325; NOGATHER-NEXT: vmovaps %xmm2, %xmm0 326; NOGATHER-NEXT: vzeroupper 327; NOGATHER-NEXT: retq 328entry: 329 %res = call <4 x float> @llvm.masked.gather.v4float(<4 x float*> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro) 330 ret <4 x float> %res 331} 332 333declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro) 334 335define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i32> %passthro) { 336; X86-LABEL: masked_gather_v8i32: 337; X86: # %bb.0: # %entry 338; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 339; X86-NEXT: vpslld $31, %ymm0, %ymm0 340; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 341; X86-NEXT: vmovdqa (%eax), %ymm2 342; X86-NEXT: vpgatherdd %ymm0, (,%ymm2), %ymm1 343; X86-NEXT: vmovdqa %ymm1, %ymm0 344; X86-NEXT: retl 345; 346; X64-LABEL: masked_gather_v8i32: 347; X64: # %bb.0: # %entry 348; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 349; X64-NEXT: vpslld $31, %ymm0, %ymm0 350; X64-NEXT: vmovdqa (%rdi), %ymm2 351; X64-NEXT: vmovdqa 32(%rdi), %ymm3 352; X64-NEXT: vextracti128 $1, %ymm1, %xmm4 353; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 354; X64-NEXT: vpgatherqd %xmm5, (,%ymm3), %xmm4 355; X64-NEXT: vpgatherqd %xmm0, (,%ymm2), %xmm1 356; X64-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0 357; X64-NEXT: retq 358; 359; NOGATHER-LABEL: masked_gather_v8i32: 360; NOGATHER: # %bb.0: # %entry 361; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 362; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0 363; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 364; NOGATHER-NEXT: vpmovmskb %xmm0, %eax 365; NOGATHER-NEXT: testb $1, %al 366; NOGATHER-NEXT: je .LBB6_2 367; NOGATHER-NEXT: # %bb.1: # %cond.load 368; NOGATHER-NEXT: vmovq %xmm2, %rcx 369; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm0 370; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 371; NOGATHER-NEXT: .LBB6_2: # %else 372; NOGATHER-NEXT: testb $2, %al 373; NOGATHER-NEXT: je .LBB6_4 374; NOGATHER-NEXT: # %bb.3: # %cond.load1 375; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 376; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm0 377; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 378; NOGATHER-NEXT: .LBB6_4: # %else2 379; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 380; NOGATHER-NEXT: testb $4, %al 381; NOGATHER-NEXT: je .LBB6_6 382; NOGATHER-NEXT: # %bb.5: # %cond.load4 383; NOGATHER-NEXT: vmovq %xmm0, %rcx 384; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm1, %xmm2 385; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 386; NOGATHER-NEXT: .LBB6_6: # %else5 387; NOGATHER-NEXT: testb $8, %al 388; NOGATHER-NEXT: je .LBB6_8 389; NOGATHER-NEXT: # %bb.7: # %cond.load7 390; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 391; NOGATHER-NEXT: vpinsrd $3, (%rcx), %xmm1, %xmm0 392; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 393; NOGATHER-NEXT: .LBB6_8: # %else8 394; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0 395; NOGATHER-NEXT: testb $16, %al 396; NOGATHER-NEXT: je .LBB6_10 397; NOGATHER-NEXT: # %bb.9: # %cond.load10 398; NOGATHER-NEXT: vmovq %xmm0, %rcx 399; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 400; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm2, %xmm2 401; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 402; NOGATHER-NEXT: .LBB6_10: # %else11 403; NOGATHER-NEXT: testb $32, %al 404; NOGATHER-NEXT: je .LBB6_12 405; NOGATHER-NEXT: # %bb.11: # %cond.load13 406; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 407; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 408; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm2, %xmm2 409; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 410; NOGATHER-NEXT: .LBB6_12: # %else14 411; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 412; NOGATHER-NEXT: testb $64, %al 413; NOGATHER-NEXT: jne .LBB6_13 414; NOGATHER-NEXT: # %bb.14: # %else17 415; NOGATHER-NEXT: testb $-128, %al 416; NOGATHER-NEXT: jne .LBB6_15 417; NOGATHER-NEXT: .LBB6_16: # %else20 418; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 419; NOGATHER-NEXT: retq 420; NOGATHER-NEXT: .LBB6_13: # %cond.load16 421; NOGATHER-NEXT: vmovq %xmm0, %rcx 422; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 423; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm2, %xmm2 424; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 425; NOGATHER-NEXT: testb $-128, %al 426; NOGATHER-NEXT: je .LBB6_16 427; NOGATHER-NEXT: .LBB6_15: # %cond.load19 428; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 429; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 430; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm0, %xmm0 431; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 432; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 433; NOGATHER-NEXT: retq 434entry: 435 %ld = load <8 x i32*>, <8 x i32*>* %ptr 436 %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro) 437 ret <8 x i32> %res 438} 439 440declare <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro) 441 442define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <8 x float> %passthro) { 443; X86-LABEL: masked_gather_v8float: 444; X86: # %bb.0: # %entry 445; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 446; X86-NEXT: vpslld $31, %ymm0, %ymm0 447; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 448; X86-NEXT: vmovaps (%eax), %ymm2 449; X86-NEXT: vgatherdps %ymm0, (,%ymm2), %ymm1 450; X86-NEXT: vmovaps %ymm1, %ymm0 451; X86-NEXT: retl 452; 453; X64-LABEL: masked_gather_v8float: 454; X64: # %bb.0: # %entry 455; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 456; X64-NEXT: vpslld $31, %ymm0, %ymm0 457; X64-NEXT: vmovaps (%rdi), %ymm2 458; X64-NEXT: vmovaps 32(%rdi), %ymm3 459; X64-NEXT: vextractf128 $1, %ymm1, %xmm4 460; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 461; X64-NEXT: vgatherqps %xmm5, (,%ymm3), %xmm4 462; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1 463; X64-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0 464; X64-NEXT: retq 465; 466; NOGATHER-LABEL: masked_gather_v8float: 467; NOGATHER: # %bb.0: # %entry 468; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 469; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0 470; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 471; NOGATHER-NEXT: vpmovmskb %xmm0, %eax 472; NOGATHER-NEXT: testb $1, %al 473; NOGATHER-NEXT: je .LBB7_2 474; NOGATHER-NEXT: # %bb.1: # %cond.load 475; NOGATHER-NEXT: vmovq %xmm2, %rcx 476; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 477; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7] 478; NOGATHER-NEXT: .LBB7_2: # %else 479; NOGATHER-NEXT: testb $2, %al 480; NOGATHER-NEXT: je .LBB7_4 481; NOGATHER-NEXT: # %bb.3: # %cond.load1 482; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 483; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3] 484; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 485; NOGATHER-NEXT: .LBB7_4: # %else2 486; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 487; NOGATHER-NEXT: testb $4, %al 488; NOGATHER-NEXT: je .LBB7_6 489; NOGATHER-NEXT: # %bb.5: # %cond.load4 490; NOGATHER-NEXT: vmovq %xmm0, %rcx 491; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0,1],mem[0],xmm1[3] 492; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 493; NOGATHER-NEXT: .LBB7_6: # %else5 494; NOGATHER-NEXT: testb $8, %al 495; NOGATHER-NEXT: je .LBB7_8 496; NOGATHER-NEXT: # %bb.7: # %cond.load7 497; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 498; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0] 499; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 500; NOGATHER-NEXT: .LBB7_8: # %else8 501; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0 502; NOGATHER-NEXT: testb $16, %al 503; NOGATHER-NEXT: je .LBB7_10 504; NOGATHER-NEXT: # %bb.9: # %cond.load10 505; NOGATHER-NEXT: vmovq %xmm0, %rcx 506; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 507; NOGATHER-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 508; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] 509; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 510; NOGATHER-NEXT: .LBB7_10: # %else11 511; NOGATHER-NEXT: testb $32, %al 512; NOGATHER-NEXT: je .LBB7_12 513; NOGATHER-NEXT: # %bb.11: # %cond.load13 514; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 515; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 516; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 517; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 518; NOGATHER-NEXT: .LBB7_12: # %else14 519; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 520; NOGATHER-NEXT: testb $64, %al 521; NOGATHER-NEXT: jne .LBB7_13 522; NOGATHER-NEXT: # %bb.14: # %else17 523; NOGATHER-NEXT: testb $-128, %al 524; NOGATHER-NEXT: jne .LBB7_15 525; NOGATHER-NEXT: .LBB7_16: # %else20 526; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 527; NOGATHER-NEXT: retq 528; NOGATHER-NEXT: .LBB7_13: # %cond.load16 529; NOGATHER-NEXT: vmovq %xmm0, %rcx 530; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 531; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 532; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 533; NOGATHER-NEXT: testb $-128, %al 534; NOGATHER-NEXT: je .LBB7_16 535; NOGATHER-NEXT: .LBB7_15: # %cond.load19 536; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 537; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 538; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 539; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 540; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 541; NOGATHER-NEXT: retq 542entry: 543 %ld = load <8 x float*>, <8 x float*>* %ptr 544 %res = call <8 x float> @llvm.masked.gather.v8float(<8 x float*> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro) 545 ret <8 x float> %res 546} 547 548declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro) 549 550define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i64> %passthro) { 551; X86-LABEL: masked_gather_v4i64: 552; X86: # %bb.0: # %entry 553; X86-NEXT: vpslld $31, %xmm0, %xmm0 554; X86-NEXT: vpmovsxdq %xmm0, %ymm0 555; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 556; X86-NEXT: vmovdqa (%eax), %xmm2 557; X86-NEXT: vpgatherdq %ymm0, (,%xmm2), %ymm1 558; X86-NEXT: vmovdqa %ymm1, %ymm0 559; X86-NEXT: retl 560; 561; X64-LABEL: masked_gather_v4i64: 562; X64: # %bb.0: # %entry 563; X64-NEXT: vpslld $31, %xmm0, %xmm0 564; X64-NEXT: vpmovsxdq %xmm0, %ymm0 565; X64-NEXT: vmovdqa (%rdi), %ymm2 566; X64-NEXT: vpgatherqq %ymm0, (,%ymm2), %ymm1 567; X64-NEXT: vmovdqa %ymm1, %ymm0 568; X64-NEXT: retq 569; 570; NOGATHER-LABEL: masked_gather_v4i64: 571; NOGATHER: # %bb.0: # %entry 572; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 573; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 574; NOGATHER-NEXT: vmovmskps %xmm0, %eax 575; NOGATHER-NEXT: testb $1, %al 576; NOGATHER-NEXT: je .LBB8_2 577; NOGATHER-NEXT: # %bb.1: # %cond.load 578; NOGATHER-NEXT: vmovq %xmm2, %rcx 579; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm0 580; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 581; NOGATHER-NEXT: .LBB8_2: # %else 582; NOGATHER-NEXT: testb $2, %al 583; NOGATHER-NEXT: je .LBB8_4 584; NOGATHER-NEXT: # %bb.3: # %cond.load1 585; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 586; NOGATHER-NEXT: vpinsrq $1, (%rcx), %xmm1, %xmm0 587; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 588; NOGATHER-NEXT: .LBB8_4: # %else2 589; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 590; NOGATHER-NEXT: testb $4, %al 591; NOGATHER-NEXT: jne .LBB8_5 592; NOGATHER-NEXT: # %bb.6: # %else5 593; NOGATHER-NEXT: testb $8, %al 594; NOGATHER-NEXT: jne .LBB8_7 595; NOGATHER-NEXT: .LBB8_8: # %else8 596; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 597; NOGATHER-NEXT: retq 598; NOGATHER-NEXT: .LBB8_5: # %cond.load4 599; NOGATHER-NEXT: vmovq %xmm0, %rcx 600; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 601; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm2, %xmm2 602; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 603; NOGATHER-NEXT: testb $8, %al 604; NOGATHER-NEXT: je .LBB8_8 605; NOGATHER-NEXT: .LBB8_7: # %cond.load7 606; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 607; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 608; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm0, %xmm0 609; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 610; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 611; NOGATHER-NEXT: retq 612entry: 613 %ld = load <4 x i64*>, <4 x i64*>* %ptr 614 %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro) 615 ret <4 x i64> %res 616} 617 618declare <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro) 619 620define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks, <4 x double> %passthro) { 621; X86-LABEL: masked_gather_v4double: 622; X86: # %bb.0: # %entry 623; X86-NEXT: vpslld $31, %xmm0, %xmm0 624; X86-NEXT: vpmovsxdq %xmm0, %ymm0 625; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 626; X86-NEXT: vmovapd (%eax), %xmm2 627; X86-NEXT: vgatherdpd %ymm0, (,%xmm2), %ymm1 628; X86-NEXT: vmovapd %ymm1, %ymm0 629; X86-NEXT: retl 630; 631; X64-LABEL: masked_gather_v4double: 632; X64: # %bb.0: # %entry 633; X64-NEXT: vpslld $31, %xmm0, %xmm0 634; X64-NEXT: vpmovsxdq %xmm0, %ymm0 635; X64-NEXT: vmovapd (%rdi), %ymm2 636; X64-NEXT: vgatherqpd %ymm0, (,%ymm2), %ymm1 637; X64-NEXT: vmovapd %ymm1, %ymm0 638; X64-NEXT: retq 639; 640; NOGATHER-LABEL: masked_gather_v4double: 641; NOGATHER: # %bb.0: # %entry 642; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 643; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 644; NOGATHER-NEXT: vmovmskps %xmm0, %eax 645; NOGATHER-NEXT: testb $1, %al 646; NOGATHER-NEXT: je .LBB9_2 647; NOGATHER-NEXT: # %bb.1: # %cond.load 648; NOGATHER-NEXT: vmovq %xmm2, %rcx 649; NOGATHER-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 650; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3,4,5,6,7] 651; NOGATHER-NEXT: .LBB9_2: # %else 652; NOGATHER-NEXT: testb $2, %al 653; NOGATHER-NEXT: je .LBB9_4 654; NOGATHER-NEXT: # %bb.3: # %cond.load1 655; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 656; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm1[0,1],mem[0,1] 657; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 658; NOGATHER-NEXT: .LBB9_4: # %else2 659; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 660; NOGATHER-NEXT: testb $4, %al 661; NOGATHER-NEXT: jne .LBB9_5 662; NOGATHER-NEXT: # %bb.6: # %else5 663; NOGATHER-NEXT: testb $8, %al 664; NOGATHER-NEXT: jne .LBB9_7 665; NOGATHER-NEXT: .LBB9_8: # %else8 666; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 667; NOGATHER-NEXT: retq 668; NOGATHER-NEXT: .LBB9_5: # %cond.load4 669; NOGATHER-NEXT: vmovq %xmm0, %rcx 670; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm2 671; NOGATHER-NEXT: vmovlps {{.*#+}} xmm2 = mem[0,1],xmm2[2,3] 672; NOGATHER-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 673; NOGATHER-NEXT: testb $8, %al 674; NOGATHER-NEXT: je .LBB9_8 675; NOGATHER-NEXT: .LBB9_7: # %cond.load7 676; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 677; NOGATHER-NEXT: vextractf128 $1, %ymm1, %xmm0 678; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 679; NOGATHER-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 680; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 681; NOGATHER-NEXT: retq 682entry: 683 %ld = load <4 x double*>, <4 x double*>* %ptr 684 %res = call <4 x double> @llvm.masked.gather.v4double(<4 x double*> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro) 685 ret <4 x double> %res 686} 687 688declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro) 689 690define <2 x i64> @masked_gather_v2i64(<2 x i64*>* %ptr, <2 x i1> %masks, <2 x i64> %passthro) { 691; X86-LABEL: masked_gather_v2i64: 692; X86: # %bb.0: # %entry 693; X86-NEXT: vpsllq $63, %xmm0, %xmm0 694; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 695; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 696; X86-NEXT: vpgatherdq %xmm0, (,%xmm2), %xmm1 697; X86-NEXT: vmovdqa %xmm1, %xmm0 698; X86-NEXT: retl 699; 700; X64-LABEL: masked_gather_v2i64: 701; X64: # %bb.0: # %entry 702; X64-NEXT: vpsllq $63, %xmm0, %xmm0 703; X64-NEXT: vmovdqa (%rdi), %xmm2 704; X64-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1 705; X64-NEXT: vmovdqa %xmm1, %xmm0 706; X64-NEXT: retq 707; 708; NOGATHER-LABEL: masked_gather_v2i64: 709; NOGATHER: # %bb.0: # %entry 710; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 711; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 712; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 713; NOGATHER-NEXT: testb $1, %al 714; NOGATHER-NEXT: jne .LBB10_1 715; NOGATHER-NEXT: # %bb.2: # %else 716; NOGATHER-NEXT: testb $2, %al 717; NOGATHER-NEXT: jne .LBB10_3 718; NOGATHER-NEXT: .LBB10_4: # %else2 719; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 720; NOGATHER-NEXT: retq 721; NOGATHER-NEXT: .LBB10_1: # %cond.load 722; NOGATHER-NEXT: vmovq %xmm2, %rcx 723; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm1 724; NOGATHER-NEXT: testb $2, %al 725; NOGATHER-NEXT: je .LBB10_4 726; NOGATHER-NEXT: .LBB10_3: # %cond.load1 727; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 728; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1 729; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 730; NOGATHER-NEXT: retq 731entry: 732 %ld = load <2 x i64*>, <2 x i64*>* %ptr 733 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro) 734 ret <2 x i64> %res 735} 736 737declare <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro) 738 739define <2 x double> @masked_gather_v2double(<2 x double*>* %ptr, <2 x i1> %masks, <2 x double> %passthro) { 740; X86-LABEL: masked_gather_v2double: 741; X86: # %bb.0: # %entry 742; X86-NEXT: vpsllq $63, %xmm0, %xmm0 743; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 744; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 745; X86-NEXT: vgatherdpd %xmm0, (,%xmm2), %xmm1 746; X86-NEXT: vmovapd %xmm1, %xmm0 747; X86-NEXT: retl 748; 749; X64-LABEL: masked_gather_v2double: 750; X64: # %bb.0: # %entry 751; X64-NEXT: vpsllq $63, %xmm0, %xmm0 752; X64-NEXT: vmovapd (%rdi), %xmm2 753; X64-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1 754; X64-NEXT: vmovapd %xmm1, %xmm0 755; X64-NEXT: retq 756; 757; NOGATHER-LABEL: masked_gather_v2double: 758; NOGATHER: # %bb.0: # %entry 759; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 760; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 761; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 762; NOGATHER-NEXT: testb $1, %al 763; NOGATHER-NEXT: jne .LBB11_1 764; NOGATHER-NEXT: # %bb.2: # %else 765; NOGATHER-NEXT: testb $2, %al 766; NOGATHER-NEXT: jne .LBB11_3 767; NOGATHER-NEXT: .LBB11_4: # %else2 768; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 769; NOGATHER-NEXT: retq 770; NOGATHER-NEXT: .LBB11_1: # %cond.load 771; NOGATHER-NEXT: vmovq %xmm2, %rcx 772; NOGATHER-NEXT: vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 773; NOGATHER-NEXT: testb $2, %al 774; NOGATHER-NEXT: je .LBB11_4 775; NOGATHER-NEXT: .LBB11_3: # %cond.load1 776; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 777; NOGATHER-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 778; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 779; NOGATHER-NEXT: retq 780entry: 781 %ld = load <2 x double*>, <2 x double*>* %ptr 782 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro) 783 ret <2 x double> %res 784} 785 786 787define <2 x double> @masked_gather_zeromask(<2 x double*>* %ptr, <2 x double> %dummy, <2 x double> %passthru) { 788; X86-LABEL: masked_gather_zeromask: 789; X86: # %bb.0: # %entry 790; X86-NEXT: vmovaps %xmm1, %xmm0 791; X86-NEXT: retl 792; 793; X64-LABEL: masked_gather_zeromask: 794; X64: # %bb.0: # %entry 795; X64-NEXT: vmovaps %xmm1, %xmm0 796; X64-NEXT: retq 797; 798; NOGATHER-LABEL: masked_gather_zeromask: 799; NOGATHER: # %bb.0: # %entry 800; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 801; NOGATHER-NEXT: retq 802entry: 803 %ld = load <2 x double*>, <2 x double*>* %ptr 804 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru) 805 ret <2 x double> %res 806} 807