1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 4 5; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target 6; PR28892 7 8define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 9; CHECK-LABEL: test1: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 12; CHECK-NEXT: retq 13entry: 14 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 15 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) #2 16 ret <16 x float> %0 17} 18 19declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 20declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 21declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 22 23 24define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 25; CHECK-LABEL: test2: 26; CHECK: # %bb.0: # %entry 27; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 28; CHECK-NEXT: retq 29entry: 30 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) #2 31 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 32 ret <16 x float> %sub.i 33} 34 35define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 36; CHECK-LABEL: test3: 37; CHECK: # %bb.0: # %entry 38; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 39; CHECK-NEXT: retq 40entry: 41 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 42 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 43 ret <16 x float> %sub.i 44} 45 46define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 47; CHECK-LABEL: test4: 48; CHECK: # %bb.0: # %entry 49; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 50; CHECK-NEXT: retq 51entry: 52 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2 53 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 54 ret <16 x float> %sub.i 55} 56 57define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 58; CHECK-LABEL: test5: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 61; CHECK-NEXT: retq 62entry: 63 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 64 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 10) #2 65 ret <16 x float> %0 66} 67 68define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 69; CHECK-LABEL: test6: 70; CHECK: # %bb.0: # %entry 71; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 72; CHECK-NEXT: retq 73entry: 74 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 10) #2 75 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 76 ret <16 x float> %sub.i 77} 78 79 80define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 81; CHECK-LABEL: test7: 82; CHECK: # %bb.0: # %entry 83; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 84; CHECK-NEXT: retq 85entry: 86 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 87 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 88 ret <8 x float> %sub.i 89} 90 91define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 92; CHECK-LABEL: test8: 93; CHECK: # %bb.0: # %entry 94; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 95; CHECK-NEXT: retq 96entry: 97 %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 98 %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 99 ret <8 x float> %0 100} 101 102declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 103 104 105define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { 106; CHECK-LABEL: test9: 107; CHECK: # %bb.0: # %entry 108; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 109; CHECK-NEXT: retq 110entry: 111 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 112 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0 113 ret <8 x double> %sub.i 114} 115 116declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32) 117 118define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 119; CHECK-LABEL: test10: 120; CHECK: # %bb.0: # %entry 121; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 122; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0 123; CHECK-NEXT: retq 124entry: 125 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2 126 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0 127 ret <2 x double> %sub.i 128} 129 130declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32) 131 132define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 133; SKX-LABEL: test11: 134; SKX: # %bb.0: # %entry 135; SKX-NEXT: vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm3 136; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 137; SKX-NEXT: kmovd %edi, %k1 138; SKX-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 139; SKX-NEXT: vmovaps %xmm3, %xmm0 140; SKX-NEXT: retq 141; 142; KNL-LABEL: test11: 143; KNL: # %bb.0: # %entry 144; KNL-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 145; KNL-NEXT: vxorps %xmm3, %xmm2, %xmm3 146; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 147; KNL-NEXT: kmovw %edi, %k1 148; KNL-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k1} 149; KNL-NEXT: vmovaps %xmm3, %xmm0 150; KNL-NEXT: retq 151entry: 152 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 153 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 154 ret <4 x float> %0 155} 156 157declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 158 159define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 160; SKX-LABEL: test11b: 161; SKX: # %bb.0: # %entry 162; SKX-NEXT: kmovd %edi, %k1 163; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 164; SKX-NEXT: retq 165; 166; KNL-LABEL: test11b: 167; KNL: # %bb.0: # %entry 168; KNL-NEXT: kmovw %edi, %k1 169; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 170; KNL-NEXT: retq 171entry: 172 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 173 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10 174 ret <4 x float> %0 175} 176 177declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) 178 179define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 180; SKX-LABEL: test12: 181; SKX: # %bb.0: # %entry 182; SKX-NEXT: kmovd %edi, %k1 183; SKX-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 184; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 185; SKX-NEXT: retq 186; 187; KNL-LABEL: test12: 188; KNL: # %bb.0: # %entry 189; KNL-NEXT: kmovw %edi, %k1 190; KNL-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 191; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0 192; KNL-NEXT: retq 193entry: 194 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2 195 %bc = bitcast i8 %mask to <8 x i1> 196 %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a 197 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %sel 198 ret <8 x double> %sub.i 199} 200 201define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 202; SKX-LABEL: test13: 203; SKX: # %bb.0: # %entry 204; SKX-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm3 205; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 206; SKX-NEXT: kmovd %edi, %k1 207; SKX-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 208; SKX-NEXT: vmovapd %xmm3, %xmm0 209; SKX-NEXT: retq 210; 211; KNL-LABEL: test13: 212; KNL: # %bb.0: # %entry 213; KNL-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm3 214; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2 215; KNL-NEXT: kmovw %edi, %k1 216; KNL-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k1} 217; KNL-NEXT: vmovapd %xmm3, %xmm0 218; KNL-NEXT: retq 219 220entry: 221 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 222 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4) 223 ret <2 x double> %0 224} 225 226define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 227; SKX-LABEL: test14: 228; SKX: # %bb.0: # %entry 229; SKX-NEXT: kmovd %edi, %k1 230; SKX-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 231; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 232; SKX-NEXT: retq 233; 234; KNL-LABEL: test14: 235; KNL: # %bb.0: # %entry 236; KNL-NEXT: kmovw %edi, %k1 237; KNL-NEXT: vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} 238; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 239; KNL-NEXT: retq 240entry: 241 %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 10) #2 242 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 243 ret <16 x float> %sub.i 244} 245 246define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 247; SKX-LABEL: test15: 248; SKX: # %bb.0: # %entry 249; SKX-NEXT: kmovd %edi, %k1 250; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3 251; SKX-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 252; SKX-NEXT: vmovaps %zmm1, %zmm3 {%k1} 253; SKX-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 254; SKX-NEXT: vmovaps %zmm3, %zmm0 255; SKX-NEXT: retq 256; 257; KNL-LABEL: test15: 258; KNL: # %bb.0: # %entry 259; KNL-NEXT: kmovw %edi, %k1 260; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3 261; KNL-NEXT: vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1 262; KNL-NEXT: vmovaps %zmm1, %zmm3 {%k1} 263; KNL-NEXT: vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1} 264; KNL-NEXT: vmovaps %zmm3, %zmm0 265; KNL-NEXT: retq 266entry: 267 %bc = bitcast i16 %mask to <16 x i1> 268 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 269 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 10) 270 %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i 271 %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 9) 272 %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel 273 ret <16 x float> %sel2 274} 275 276define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 277; SKX-LABEL: test16: 278; SKX: # %bb.0: 279; SKX-NEXT: kmovd %edi, %k1 280; SKX-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 281; SKX-NEXT: retq 282; 283; KNL-LABEL: test16: 284; KNL: # %bb.0: 285; KNL-NEXT: kmovw %edi, %k1 286; KNL-NEXT: vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} 287; KNL-NEXT: retq 288 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 289 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 9) 290 %bc = bitcast i16 %mask to <16 x i1> 291 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a 292 ret <16 x float> %sel 293} 294declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 295 296define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) { 297; SKX-LABEL: test17: 298; SKX: # %bb.0: 299; SKX-NEXT: kmovd %edi, %k1 300; SKX-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 301; SKX-NEXT: retq 302; 303; KNL-LABEL: test17: 304; KNL: # %bb.0: 305; KNL-NEXT: kmovw %edi, %k1 306; KNL-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 307; KNL-NEXT: retq 308 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 309 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4) 310 %bc = bitcast i8 %mask to <8 x i1> 311 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a 312 ret <8 x double> %sel 313} 314declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) 315 316define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 317; SKX-LABEL: test18: 318; SKX: # %bb.0: # %entry 319; SKX-NEXT: kmovd %edi, %k1 320; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 321; SKX-NEXT: retq 322; 323; KNL-LABEL: test18: 324; KNL: # %bb.0: # %entry 325; KNL-NEXT: kmovw %edi, %k1 326; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 327; KNL-NEXT: retq 328entry: 329 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 330 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 331 ret <4 x float> %0 332} 333 334define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 335; SKX-LABEL: test19: 336; SKX: # %bb.0: # %entry 337; SKX-NEXT: kmovd %edi, %k1 338; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 339; SKX-NEXT: retq 340; 341; KNL-LABEL: test19: 342; KNL: # %bb.0: # %entry 343; KNL-NEXT: kmovw %edi, %k1 344; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 345; KNL-NEXT: retq 346entry: 347 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 348 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 349 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10 350 ret <4 x float> %0 351} 352 353define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 354; SKX-LABEL: test20: 355; SKX: # %bb.0: # %entry 356; SKX-NEXT: kmovd %edi, %k1 357; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 358; SKX-NEXT: vmovaps %xmm2, %xmm0 359; SKX-NEXT: retq 360; 361; KNL-LABEL: test20: 362; KNL: # %bb.0: # %entry 363; KNL-NEXT: kmovw %edi, %k1 364; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 365; KNL-NEXT: vmovaps %xmm2, %xmm0 366; KNL-NEXT: retq 367entry: 368 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 369 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10 370 ret <4 x float> %0 371} 372 373define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 374; SKX-LABEL: test21: 375; SKX: # %bb.0: # %entry 376; SKX-NEXT: kmovd %edi, %k1 377; SKX-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 378; SKX-NEXT: retq 379; 380; KNL-LABEL: test21: 381; KNL: # %bb.0: # %entry 382; KNL-NEXT: kmovw %edi, %k1 383; KNL-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 384; KNL-NEXT: retq 385entry: 386 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 387 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 388 ret <4 x float> %0 389} 390 391define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 392; SKX-LABEL: test22: 393; SKX: # %bb.0: # %entry 394; SKX-NEXT: kmovd %edi, %k1 395; SKX-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 396; SKX-NEXT: retq 397; 398; KNL-LABEL: test22: 399; KNL: # %bb.0: # %entry 400; KNL-NEXT: kmovw %edi, %k1 401; KNL-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 402; KNL-NEXT: retq 403entry: 404 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 405 %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 406 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10 407 ret <4 x float> %0 408} 409 410define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 411; SKX-LABEL: test23: 412; SKX: # %bb.0: # %entry 413; SKX-NEXT: kmovd %edi, %k1 414; SKX-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 415; SKX-NEXT: vmovaps %xmm2, %xmm0 416; SKX-NEXT: retq 417; 418; KNL-LABEL: test23: 419; KNL: # %bb.0: # %entry 420; KNL-NEXT: kmovw %edi, %k1 421; KNL-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} 422; KNL-NEXT: vmovaps %xmm2, %xmm0 423; KNL-NEXT: retq 424entry: 425 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 426 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10 427 ret <4 x float> %0 428} 429 430define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 { 431; SKX-LABEL: test24: 432; SKX: # %bb.0: # %entry 433; SKX-NEXT: kmovd %edi, %k1 434; SKX-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 435; SKX-NEXT: retq 436; 437; KNL-LABEL: test24: 438; KNL: # %bb.0: # %entry 439; KNL-NEXT: kmovw %edi, %k1 440; KNL-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} 441; KNL-NEXT: retq 442entry: 443 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 444 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10 445 ret <4 x float> %0 446} 447 448define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c) { 449; CHECK-LABEL: test25: 450; CHECK: # %bb.0: # %entry 451; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 452; CHECK-NEXT: retq 453entry: 454 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 455 %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 456 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2 457 ret <16 x float> %0 458} 459