1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5define double @test1(double %a, double %b) nounwind { 6; ALL-LABEL: test1: 7; ALL: ## %bb.0: 8; ALL-NEXT: vucomisd %xmm1, %xmm0 9; ALL-NEXT: jne LBB0_1 10; ALL-NEXT: jnp LBB0_2 11; ALL-NEXT: LBB0_1: ## %l1 12; ALL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 13; ALL-NEXT: retq 14; ALL-NEXT: LBB0_2: ## %l2 15; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 16; ALL-NEXT: retq 17 %tobool = fcmp une double %a, %b 18 br i1 %tobool, label %l1, label %l2 19 20l1: 21 %c = fsub double %a, %b 22 ret double %c 23l2: 24 %c1 = fadd double %a, %b 25 ret double %c1 26} 27 28define float @test2(float %a, float %b) nounwind { 29; ALL-LABEL: test2: 30; ALL: ## %bb.0: 31; ALL-NEXT: vucomiss %xmm0, %xmm1 32; ALL-NEXT: jbe LBB1_2 33; ALL-NEXT: ## %bb.1: ## %l1 34; ALL-NEXT: vsubss %xmm1, %xmm0, %xmm0 35; ALL-NEXT: retq 36; ALL-NEXT: LBB1_2: ## %l2 37; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0 38; ALL-NEXT: retq 39 %tobool = fcmp olt float %a, %b 40 br i1 %tobool, label %l1, label %l2 41 42l1: 43 %c = fsub float %a, %b 44 ret float %c 45l2: 46 %c1 = fadd float %a, %b 47 ret float %c1 48} 49 50define i32 @test3(float %a, float %b) { 51; ALL-LABEL: test3: 52; ALL: ## %bb.0: 53; ALL-NEXT: vcmpeqss %xmm1, %xmm0, %k0 54; ALL-NEXT: kmovw %k0, %eax 55; ALL-NEXT: retq 56 57 %cmp10.i = fcmp oeq float %a, %b 58 %conv11.i = zext i1 %cmp10.i to i32 59 ret i32 %conv11.i 60} 61 62define float @test5(float %p) #0 { 63; ALL-LABEL: test5: 64; ALL: ## %bb.0: ## %entry 65; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 66; ALL-NEXT: vucomiss %xmm1, %xmm0 67; ALL-NEXT: jne LBB3_1 68; ALL-NEXT: jp LBB3_1 69; ALL-NEXT: ## %bb.2: ## %return 70; ALL-NEXT: retq 71; ALL-NEXT: LBB3_1: ## %if.end 72; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1 73; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 74; ALL-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1} 75; ALL-NEXT: retq 76entry: 77 %cmp = fcmp oeq float %p, 0.000000e+00 78 br i1 %cmp, label %return, label %if.end 79 80if.end: ; preds = %entry 81 %cmp1 = fcmp ogt float %p, 0.000000e+00 82 %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00 83 br label %return 84 85return: ; preds = %if.end, %entry 86 %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ] 87 ret float %retval.0 88} 89 90define i32 @test6(i32 %a, i32 %b) { 91; ALL-LABEL: test6: 92; ALL: ## %bb.0: 93; ALL-NEXT: xorl %eax, %eax 94; ALL-NEXT: cmpl %esi, %edi 95; ALL-NEXT: sete %al 96; ALL-NEXT: retq 97 %cmp = icmp eq i32 %a, %b 98 %res = zext i1 %cmp to i32 99 ret i32 %res 100} 101 102define i32 @test7(double %x, double %y) #2 { 103; ALL-LABEL: test7: 104; ALL: ## %bb.0: ## %entry 105; ALL-NEXT: xorl %eax, %eax 106; ALL-NEXT: vucomisd %xmm1, %xmm0 107; ALL-NEXT: setne %al 108; ALL-NEXT: retq 109entry: 110 %0 = fcmp one double %x, %y 111 %or = zext i1 %0 to i32 112 ret i32 %or 113} 114 115define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { 116; ALL-LABEL: test8: 117; ALL: ## %bb.0: 118; ALL-NEXT: notl %edi 119; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000 120; ALL-NEXT: testl %edx, %edx 121; ALL-NEXT: movl $1, %eax 122; ALL-NEXT: cmovel %eax, %edx 123; ALL-NEXT: orl %edi, %esi 124; ALL-NEXT: cmovnel %edx, %eax 125; ALL-NEXT: retq 126 %tmp1 = icmp eq i32 %a1, -1 127 %tmp2 = icmp eq i32 %a2, -2147483648 128 %tmp3 = and i1 %tmp1, %tmp2 129 %tmp4 = icmp eq i32 %a3, 0 130 %tmp5 = or i1 %tmp3, %tmp4 131 %res = select i1 %tmp5, i32 1, i32 %a3 132 ret i32 %res 133} 134 135define i32 @test9(i64 %a) { 136; ALL-LABEL: test9: 137; ALL: ## %bb.0: 138; ALL-NEXT: testb $1, %dil 139; ALL-NEXT: jne LBB7_2 140; ALL-NEXT: ## %bb.1: ## %A 141; ALL-NEXT: movl $6, %eax 142; ALL-NEXT: retq 143; ALL-NEXT: LBB7_2: ## %B 144; ALL-NEXT: movl $7, %eax 145; ALL-NEXT: retq 146 %b = and i64 %a, 1 147 %cmp10.i = icmp eq i64 %b, 0 148 br i1 %cmp10.i, label %A, label %B 149A: 150 ret i32 6 151B: 152 ret i32 7 153} 154 155define i32 @test10(i64 %b, i64 %c, i1 %d) { 156; ALL-LABEL: test10: 157; ALL: ## %bb.0: 158; ALL-NEXT: movl %edx, %eax 159; ALL-NEXT: andb $1, %al 160; ALL-NEXT: cmpq %rsi, %rdi 161; ALL-NEXT: sete %cl 162; ALL-NEXT: orb %dl, %cl 163; ALL-NEXT: andb $1, %cl 164; ALL-NEXT: cmpb %cl, %al 165; ALL-NEXT: je LBB8_1 166; ALL-NEXT: ## %bb.2: ## %if.end.i 167; ALL-NEXT: movl $6, %eax 168; ALL-NEXT: retq 169; ALL-NEXT: LBB8_1: ## %if.then.i 170; ALL-NEXT: movl $5, %eax 171; ALL-NEXT: retq 172 173 %cmp8.i = icmp eq i64 %b, %c 174 %or1 = or i1 %d, %cmp8.i 175 %xor1 = xor i1 %d, %or1 176 br i1 %xor1, label %if.end.i, label %if.then.i 177 178if.then.i: 179 ret i32 5 180 181if.end.i: 182 ret i32 6 183} 184 185; This test previously caused an infinite loop in legalize vector ops. Due to 186; CSE triggering on the call to UpdateNodeOperands and the resulting node not 187; being passed to LowerOperation. The add is needed to force the zext into a 188; sext on that path. The shuffle keeps the zext alive. The xor somehow 189; influences the zext to be visited before the sext exposing the CSE opportunity 190; for the sext since zext of setcc is custom legalized to a sext and shift. 191define <8 x i32> @legalize_loop(<8 x double> %arg) { 192; KNL-LABEL: legalize_loop: 193; KNL: ## %bb.0: 194; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 195; KNL-NEXT: vcmpnltpd %zmm0, %zmm1, %k1 196; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 197; KNL-NEXT: vpsrld $31, %ymm0, %ymm1 198; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] 199; KNL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 200; KNL-NEXT: vpsubd %ymm0, %ymm1, %ymm0 201; KNL-NEXT: retq 202; 203; SKX-LABEL: legalize_loop: 204; SKX: ## %bb.0: 205; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 206; SKX-NEXT: vcmpnltpd %zmm0, %zmm1, %k0 207; SKX-NEXT: vpmovm2d %k0, %ymm0 208; SKX-NEXT: vpsrld $31, %ymm0, %ymm1 209; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] 210; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] 211; SKX-NEXT: vpsubd %ymm0, %ymm1, %ymm0 212; SKX-NEXT: retq 213 %tmp = fcmp ogt <8 x double> %arg, zeroinitializer 214 %tmp1 = xor <8 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 215 %tmp2 = zext <8 x i1> %tmp1 to <8 x i32> 216 %tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 217 %tmp4 = add <8 x i32> %tmp2, %tmp3 218 ret <8 x i32> %tmp4 219} 220