1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64 3; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32 4 5define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { 6; X86-64-LABEL: test_fcmp_storefloat: 7; X86-64: # %bb.0: # %entry 8; X86-64-NEXT: testb $1, %dil 9; X86-64-NEXT: je .LBB0_2 10; X86-64-NEXT: # %bb.1: # %if 11; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k1 12; X86-64-NEXT: jmp .LBB0_3 13; X86-64-NEXT: .LBB0_2: # %else 14; X86-64-NEXT: vcmpeqss %xmm5, %xmm4, %k1 15; X86-64-NEXT: .LBB0_3: # %exit 16; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 17; X86-64-NEXT: vmovss %xmm1, (%rsi) 18; X86-64-NEXT: retq 19; 20; X86-32-LABEL: test_fcmp_storefloat: 21; X86-32: # %bb.0: # %entry 22; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 23; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 24; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 26; X86-32-NEXT: je .LBB0_2 27; X86-32-NEXT: # %bb.1: # %if 28; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 29; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 30; X86-32-NEXT: jmp .LBB0_3 31; X86-32-NEXT: .LBB0_2: # %else 32; X86-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 33; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1 34; X86-32-NEXT: .LBB0_3: # %exit 35; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 36; X86-32-NEXT: vmovss %xmm0, (%eax) 37; X86-32-NEXT: retl 38entry: 39 br i1 %cond, label %if, label %else 40 41if: 42 %cmp1 = fcmp oeq float %f3, %f4 43 br label %exit 44 45else: 46 %cmp2 = fcmp oeq float %f5, %f6 47 br label %exit 48 49exit: 50 %val = phi i1 [%cmp1, %if], [%cmp2, %else] 51 %selected = select i1 %val, float %f1, float %f2 52 store float %selected, float* %fptr 53 ret void 54} 55 56define void @test_fcmp_storei1(i1 %cond, float* %fptr, i1* %iptr, float %f1, float %f2, float %f3, float %f4) { 57; X86-64-LABEL: test_fcmp_storei1: 58; X86-64: # %bb.0: # %entry 59; X86-64-NEXT: testb $1, %dil 60; X86-64-NEXT: je .LBB1_2 61; X86-64-NEXT: # %bb.1: # %if 62; X86-64-NEXT: vcmpeqss %xmm1, %xmm0, %k0 63; X86-64-NEXT: kmovb %k0, (%rdx) 64; X86-64-NEXT: retq 65; X86-64-NEXT: .LBB1_2: # %else 66; X86-64-NEXT: vcmpeqss %xmm3, %xmm2, %k0 67; X86-64-NEXT: kmovb %k0, (%rdx) 68; X86-64-NEXT: retq 69; 70; X86-32-LABEL: test_fcmp_storei1: 71; X86-32: # %bb.0: # %entry 72; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 73; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 74; X86-32-NEXT: je .LBB1_2 75; X86-32-NEXT: # %bb.1: # %if 76; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 77; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 78; X86-32-NEXT: kmovb %k0, (%eax) 79; X86-32-NEXT: retl 80; X86-32-NEXT: .LBB1_2: # %else 81; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 82; X86-32-NEXT: vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0 83; X86-32-NEXT: kmovb %k0, (%eax) 84; X86-32-NEXT: retl 85entry: 86 br i1 %cond, label %if, label %else 87 88if: 89 %cmp1 = fcmp oeq float %f1, %f2 90 br label %exit 91 92else: 93 %cmp2 = fcmp oeq float %f3, %f4 94 br label %exit 95 96exit: 97 %val = phi i1 [%cmp1, %if], [%cmp2, %else] 98 store i1 %val, i1* %iptr 99 ret void 100} 101 102define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) { 103; X86-64-LABEL: test_load_add: 104; X86-64: # %bb.0: # %entry 105; X86-64-NEXT: testb $1, %dil 106; X86-64-NEXT: je .LBB2_2 107; X86-64-NEXT: # %bb.1: # %if 108; X86-64-NEXT: movb (%rdx), %al 109; X86-64-NEXT: addb (%rcx), %al 110; X86-64-NEXT: jmp .LBB2_3 111; X86-64-NEXT: .LBB2_2: # %else 112; X86-64-NEXT: movb (%rcx), %al 113; X86-64-NEXT: .LBB2_3: # %exit 114; X86-64-NEXT: kmovd %eax, %k1 115; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 116; X86-64-NEXT: vmovss %xmm1, (%rsi) 117; X86-64-NEXT: retq 118; 119; X86-32-LABEL: test_load_add: 120; X86-32: # %bb.0: # %entry 121; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 122; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 123; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 124; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 125; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 126; X86-32-NEXT: je .LBB2_2 127; X86-32-NEXT: # %bb.1: # %if 128; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx 129; X86-32-NEXT: movb (%edx), %dl 130; X86-32-NEXT: addb (%ecx), %dl 131; X86-32-NEXT: jmp .LBB2_3 132; X86-32-NEXT: .LBB2_2: # %else 133; X86-32-NEXT: movb (%ecx), %dl 134; X86-32-NEXT: .LBB2_3: # %exit 135; X86-32-NEXT: kmovd %edx, %k1 136; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 137; X86-32-NEXT: vmovss %xmm0, (%eax) 138; X86-32-NEXT: retl 139entry: 140 br i1 %cond, label %if, label %else 141 142if: 143 %loaded1 = load i1, i1* %iptr1 144 %loaded2if = load i1, i1* %iptr2 145 %added = add i1 %loaded1, %loaded2if 146 br label %exit 147 148else: 149 %loaded2else = load i1, i1* %iptr2 150 br label %exit 151 152exit: 153 %val = phi i1 [%added, %if], [%loaded2else, %else] 154 %selected = select i1 %val, float %f1, float %f2 155 store float %selected, float* %fptr 156 ret void 157} 158 159define void @test_load_i1(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2) { 160; X86-64-LABEL: test_load_i1: 161; X86-64: # %bb.0: # %entry 162; X86-64-NEXT: testb $1, %dil 163; X86-64-NEXT: je .LBB3_2 164; X86-64-NEXT: # %bb.1: # %if 165; X86-64-NEXT: kmovb (%rdx), %k1 166; X86-64-NEXT: jmp .LBB3_3 167; X86-64-NEXT: .LBB3_2: # %else 168; X86-64-NEXT: kmovb (%rcx), %k1 169; X86-64-NEXT: .LBB3_3: # %exit 170; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} 171; X86-64-NEXT: vmovss %xmm1, (%rsi) 172; X86-64-NEXT: retq 173; 174; X86-32-LABEL: test_load_i1: 175; X86-32: # %bb.0: # %entry 176; X86-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 177; X86-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 178; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 180; X86-32-NEXT: je .LBB3_2 181; X86-32-NEXT: # %bb.1: # %if 182; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 183; X86-32-NEXT: jmp .LBB3_3 184; X86-32-NEXT: .LBB3_2: # %else 185; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 186; X86-32-NEXT: .LBB3_3: # %exit 187; X86-32-NEXT: kmovb (%ecx), %k1 188; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} 189; X86-32-NEXT: vmovss %xmm0, (%eax) 190; X86-32-NEXT: retl 191entry: 192 br i1 %cond, label %if, label %else 193 194if: 195 %loaded1 = load i1, i1* %iptr1 196 br label %exit 197 198else: 199 %loaded2 = load i1, i1* %iptr2 200 br label %exit 201 202exit: 203 %val = phi i1 [%loaded1, %if], [%loaded2, %else] 204 %selected = select i1 %val, float %f1, float %f2 205 store float %selected, float* %fptr 206 ret void 207} 208 209define void @test_loadi1_storei1(i1 %cond, i1* %iptr1, i1* %iptr2, i1* %iptr3) { 210; X86-64-LABEL: test_loadi1_storei1: 211; X86-64: # %bb.0: # %entry 212; X86-64-NEXT: testb $1, %dil 213; X86-64-NEXT: je .LBB4_2 214; X86-64-NEXT: # %bb.1: # %if 215; X86-64-NEXT: movb (%rsi), %al 216; X86-64-NEXT: jmp .LBB4_3 217; X86-64-NEXT: .LBB4_2: # %else 218; X86-64-NEXT: movb (%rdx), %al 219; X86-64-NEXT: .LBB4_3: # %exit 220; X86-64-NEXT: andb $1, %al 221; X86-64-NEXT: movb %al, (%rcx) 222; X86-64-NEXT: retq 223; 224; X86-32-LABEL: test_loadi1_storei1: 225; X86-32: # %bb.0: # %entry 226; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 227; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 228; X86-32-NEXT: je .LBB4_2 229; X86-32-NEXT: # %bb.1: # %if 230; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 231; X86-32-NEXT: jmp .LBB4_3 232; X86-32-NEXT: .LBB4_2: # %else 233; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 234; X86-32-NEXT: .LBB4_3: # %exit 235; X86-32-NEXT: movb (%ecx), %cl 236; X86-32-NEXT: andb $1, %cl 237; X86-32-NEXT: movb %cl, (%eax) 238; X86-32-NEXT: retl 239entry: 240 br i1 %cond, label %if, label %else 241 242if: 243 %loaded1 = load i1, i1* %iptr1 244 br label %exit 245 246else: 247 %loaded2 = load i1, i1* %iptr2 248 br label %exit 249 250exit: 251 %val = phi i1 [%loaded1, %if], [%loaded2, %else] 252 store i1 %val, i1* %iptr3 253 ret void 254} 255 256define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { 257; X86-64-LABEL: test_shl1: 258; X86-64: # %bb.0: # %entry 259; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 260; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 261; X86-64-NEXT: testb $1, %dil 262; X86-64-NEXT: je .LBB5_2 263; X86-64-NEXT: # %bb.1: # %if 264; X86-64-NEXT: kmovb (%rsi), %k0 265; X86-64-NEXT: kaddb %k0, %k0, %k1 266; X86-64-NEXT: jmp .LBB5_3 267; X86-64-NEXT: .LBB5_2: # %else 268; X86-64-NEXT: kmovb (%rdx), %k1 269; X86-64-NEXT: .LBB5_3: # %exit 270; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 271; X86-64-NEXT: vmovaps %ymm1, (%rcx) 272; X86-64-NEXT: vzeroupper 273; X86-64-NEXT: retq 274; 275; X86-32-LABEL: test_shl1: 276; X86-32: # %bb.0: # %entry 277; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 278; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 279; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 280; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 281; X86-32-NEXT: je .LBB5_2 282; X86-32-NEXT: # %bb.1: # %if 283; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 284; X86-32-NEXT: kmovb (%ecx), %k0 285; X86-32-NEXT: kaddb %k0, %k0, %k1 286; X86-32-NEXT: jmp .LBB5_3 287; X86-32-NEXT: .LBB5_2: # %else 288; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 289; X86-32-NEXT: kmovb (%ecx), %k1 290; X86-32-NEXT: .LBB5_3: # %exit 291; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 292; X86-32-NEXT: vmovaps %ymm1, (%eax) 293; X86-32-NEXT: vzeroupper 294; X86-32-NEXT: retl 295entry: 296 br i1 %cond, label %if, label %else 297 298if: 299 %loaded1 = load i8, i8* %ptr1 300 %shifted = shl i8 %loaded1, 1 301 br label %exit 302 303else: 304 %loaded2 = load i8, i8* %ptr2 305 br label %exit 306 307exit: 308 %val = phi i8 [%shifted, %if], [%loaded2, %else] 309 %mask = bitcast i8 %val to <8 x i1> 310 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 311 store <8 x float> %selected, <8 x float>* %fptrvec 312 ret void 313} 314 315define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { 316; X86-64-LABEL: test_shr1: 317; X86-64: # %bb.0: # %entry 318; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 319; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 320; X86-64-NEXT: testb $1, %dil 321; X86-64-NEXT: je .LBB6_2 322; X86-64-NEXT: # %bb.1: # %if 323; X86-64-NEXT: movb (%rsi), %al 324; X86-64-NEXT: shrb %al 325; X86-64-NEXT: jmp .LBB6_3 326; X86-64-NEXT: .LBB6_2: # %else 327; X86-64-NEXT: movb (%rdx), %al 328; X86-64-NEXT: .LBB6_3: # %exit 329; X86-64-NEXT: kmovd %eax, %k1 330; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 331; X86-64-NEXT: vmovaps %ymm1, (%rcx) 332; X86-64-NEXT: vzeroupper 333; X86-64-NEXT: retq 334; 335; X86-32-LABEL: test_shr1: 336; X86-32: # %bb.0: # %entry 337; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 338; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 339; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 340; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 341; X86-32-NEXT: je .LBB6_2 342; X86-32-NEXT: # %bb.1: # %if 343; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 344; X86-32-NEXT: movb (%ecx), %cl 345; X86-32-NEXT: shrb %cl 346; X86-32-NEXT: jmp .LBB6_3 347; X86-32-NEXT: .LBB6_2: # %else 348; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 349; X86-32-NEXT: movb (%ecx), %cl 350; X86-32-NEXT: .LBB6_3: # %exit 351; X86-32-NEXT: kmovd %ecx, %k1 352; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 353; X86-32-NEXT: vmovaps %ymm1, (%eax) 354; X86-32-NEXT: vzeroupper 355; X86-32-NEXT: retl 356entry: 357 br i1 %cond, label %if, label %else 358 359if: 360 %loaded1 = load i8, i8* %ptr1 361 %shifted = lshr i8 %loaded1, 1 362 br label %exit 363 364else: 365 %loaded2 = load i8, i8* %ptr2 366 br label %exit 367 368exit: 369 %val = phi i8 [%shifted, %if], [%loaded2, %else] 370 %mask = bitcast i8 %val to <8 x i1> 371 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 372 store <8 x float> %selected, <8 x float>* %fptrvec 373 ret void 374} 375 376define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { 377; X86-64-LABEL: test_shr2: 378; X86-64: # %bb.0: # %entry 379; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 380; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 381; X86-64-NEXT: testb $1, %dil 382; X86-64-NEXT: je .LBB7_2 383; X86-64-NEXT: # %bb.1: # %if 384; X86-64-NEXT: kmovb (%rsi), %k0 385; X86-64-NEXT: kshiftrb $2, %k0, %k1 386; X86-64-NEXT: jmp .LBB7_3 387; X86-64-NEXT: .LBB7_2: # %else 388; X86-64-NEXT: kmovb (%rdx), %k1 389; X86-64-NEXT: .LBB7_3: # %exit 390; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 391; X86-64-NEXT: vmovaps %ymm1, (%rcx) 392; X86-64-NEXT: vzeroupper 393; X86-64-NEXT: retq 394; 395; X86-32-LABEL: test_shr2: 396; X86-32: # %bb.0: # %entry 397; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 398; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 399; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 400; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 401; X86-32-NEXT: je .LBB7_2 402; X86-32-NEXT: # %bb.1: # %if 403; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 404; X86-32-NEXT: kmovb (%ecx), %k0 405; X86-32-NEXT: kshiftrb $2, %k0, %k1 406; X86-32-NEXT: jmp .LBB7_3 407; X86-32-NEXT: .LBB7_2: # %else 408; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 409; X86-32-NEXT: kmovb (%ecx), %k1 410; X86-32-NEXT: .LBB7_3: # %exit 411; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 412; X86-32-NEXT: vmovaps %ymm1, (%eax) 413; X86-32-NEXT: vzeroupper 414; X86-32-NEXT: retl 415entry: 416 br i1 %cond, label %if, label %else 417 418if: 419 %loaded1 = load i8, i8* %ptr1 420 %shifted = lshr i8 %loaded1, 2 421 br label %exit 422 423else: 424 %loaded2 = load i8, i8* %ptr2 425 br label %exit 426 427exit: 428 %val = phi i8 [%shifted, %if], [%loaded2, %else] 429 %mask = bitcast i8 %val to <8 x i1> 430 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 431 store <8 x float> %selected, <8 x float>* %fptrvec 432 ret void 433} 434 435define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { 436; X86-64-LABEL: test_shl: 437; X86-64: # %bb.0: # %entry 438; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 439; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 440; X86-64-NEXT: testb $1, %dil 441; X86-64-NEXT: je .LBB8_2 442; X86-64-NEXT: # %bb.1: # %if 443; X86-64-NEXT: kmovb (%rsi), %k0 444; X86-64-NEXT: kshiftlb $6, %k0, %k1 445; X86-64-NEXT: jmp .LBB8_3 446; X86-64-NEXT: .LBB8_2: # %else 447; X86-64-NEXT: kmovb (%rdx), %k1 448; X86-64-NEXT: .LBB8_3: # %exit 449; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 450; X86-64-NEXT: vmovaps %ymm1, (%rcx) 451; X86-64-NEXT: vzeroupper 452; X86-64-NEXT: retq 453; 454; X86-32-LABEL: test_shl: 455; X86-32: # %bb.0: # %entry 456; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 457; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 458; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 459; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 460; X86-32-NEXT: je .LBB8_2 461; X86-32-NEXT: # %bb.1: # %if 462; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 463; X86-32-NEXT: kmovb (%ecx), %k0 464; X86-32-NEXT: kshiftlb $6, %k0, %k1 465; X86-32-NEXT: jmp .LBB8_3 466; X86-32-NEXT: .LBB8_2: # %else 467; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 468; X86-32-NEXT: kmovb (%ecx), %k1 469; X86-32-NEXT: .LBB8_3: # %exit 470; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 471; X86-32-NEXT: vmovaps %ymm1, (%eax) 472; X86-32-NEXT: vzeroupper 473; X86-32-NEXT: retl 474entry: 475 br i1 %cond, label %if, label %else 476 477if: 478 %loaded1 = load i8, i8* %ptr1 479 %shifted = shl i8 %loaded1, 6 480 br label %exit 481 482else: 483 %loaded2 = load i8, i8* %ptr2 484 br label %exit 485 486exit: 487 %val = phi i8 [%shifted, %if], [%loaded2, %else] 488 %mask = bitcast i8 %val to <8 x i1> 489 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 490 store <8 x float> %selected, <8 x float>* %fptrvec 491 ret void 492} 493 494define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { 495; X86-64-LABEL: test_add: 496; X86-64: # %bb.0: # %entry 497; X86-64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 498; X86-64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 499; X86-64-NEXT: kmovb (%rsi), %k0 500; X86-64-NEXT: kmovb (%rdx), %k1 501; X86-64-NEXT: testb $1, %dil 502; X86-64-NEXT: je .LBB9_2 503; X86-64-NEXT: # %bb.1: # %if 504; X86-64-NEXT: kandb %k1, %k0, %k1 505; X86-64-NEXT: jmp .LBB9_3 506; X86-64-NEXT: .LBB9_2: # %else 507; X86-64-NEXT: kaddb %k1, %k0, %k1 508; X86-64-NEXT: .LBB9_3: # %exit 509; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} 510; X86-64-NEXT: vmovaps %ymm1, (%rcx) 511; X86-64-NEXT: vzeroupper 512; X86-64-NEXT: retq 513; 514; X86-32-LABEL: test_add: 515; X86-32: # %bb.0: # %entry 516; X86-32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 517; X86-32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 518; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax 519; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 520; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx 521; X86-32-NEXT: kmovb (%edx), %k0 522; X86-32-NEXT: kmovb (%ecx), %k1 523; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) 524; X86-32-NEXT: je .LBB9_2 525; X86-32-NEXT: # %bb.1: # %if 526; X86-32-NEXT: kandb %k1, %k0, %k1 527; X86-32-NEXT: jmp .LBB9_3 528; X86-32-NEXT: .LBB9_2: # %else 529; X86-32-NEXT: kaddb %k1, %k0, %k1 530; X86-32-NEXT: .LBB9_3: # %exit 531; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} 532; X86-32-NEXT: vmovaps %ymm1, (%eax) 533; X86-32-NEXT: vzeroupper 534; X86-32-NEXT: retl 535entry: 536 %loaded1 = load i8, i8* %ptr1 537 %loaded2 = load i8, i8* %ptr2 538 br i1 %cond, label %if, label %else 539 540if: 541 %and = and i8 %loaded1, %loaded2 542 br label %exit 543 544else: 545 %add = add i8 %loaded1, %loaded2 546 br label %exit 547 548exit: 549 %val = phi i8 [%and, %if], [%add, %else] 550 %mask = bitcast i8 %val to <8 x i1> 551 %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2 552 store <8 x float> %selected, <8 x float>* %fptrvec 553 ret void 554} 555