1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma -show-mc-encoding | FileCheck %s 3 4define <2 x double> @combine_scalar_mask_fmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 5; CHECK-LABEL: combine_scalar_mask_fmadd_f32: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] 9; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + xmm2 10; CHECK-NEXT: retq # encoding: [0xc3] 11entry: 12 %0 = bitcast <2 x double> %a to <4 x float> 13 %1 = bitcast <2 x double> %b to <4 x float> 14 %2 = bitcast <2 x double> %c to <4 x float> 15 %3 = extractelement <4 x float> %0, i64 0 16 %4 = extractelement <4 x float> %1, i64 0 17 %5 = extractelement <4 x float> %2, i64 0 18 %6 = fmul fast float %4, %3 19 %7 = fadd fast float %6, %5 20 %8 = bitcast i8 %k to <8 x i1> 21 %9 = extractelement <8 x i1> %8, i64 0 22 %10 = select i1 %9, float %7, float %3 23 %11 = insertelement <4 x float> %0, float %10, i64 0 24 %12 = bitcast <4 x float> %11 to <2 x double> 25 ret <2 x double> %12 26} 27 28define <2 x double> @combine_scalar_mask_fmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 29; CHECK-LABEL: combine_scalar_mask_fmadd_f64: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 32; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] 33; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) + xmm2 34; CHECK-NEXT: retq # encoding: [0xc3] 35entry: 36 %0 = extractelement <2 x double> %a, i64 0 37 %1 = extractelement <2 x double> %b, i64 0 38 %2 = extractelement <2 x double> %c, i64 0 39 %3 = fmul fast double %1, %0 40 %4 = fadd fast double %3, %2 41 %5 = bitcast i8 %k to <8 x i1> 42 %6 = extractelement <8 x i1> %5, i64 0 43 %7 = select i1 %6, double %4, double %0 44 %8 = insertelement <2 x double> %a, double %7, i64 0 45 ret <2 x double> %8 46} 47 48define <2 x double> @combine_scalar_maskz_fmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 49; CHECK-LABEL: combine_scalar_maskz_fmadd_32: 50; CHECK: # %bb.0: # %entry 51; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 52; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] 53; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 54; CHECK-NEXT: retq # encoding: [0xc3] 55entry: 56 %0 = bitcast <2 x double> %a to <4 x float> 57 %1 = bitcast <2 x double> %b to <4 x float> 58 %2 = bitcast <2 x double> %c to <4 x float> 59 %3 = extractelement <4 x float> %0, i64 0 60 %4 = extractelement <4 x float> %1, i64 0 61 %5 = extractelement <4 x float> %2, i64 0 62 %6 = fmul fast float %4, %3 63 %7 = fadd fast float %6, %5 64 %8 = bitcast i8 %k to <8 x i1> 65 %9 = extractelement <8 x i1> %8, i64 0 66 %10 = select i1 %9, float %7, float 0.000000e+00 67 %11 = insertelement <4 x float> %0, float %10, i64 0 68 %12 = bitcast <4 x float> %11 to <2 x double> 69 ret <2 x double> %12 70} 71 72define <2 x double> @combine_scalar_maskz_fmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 73; CHECK-LABEL: combine_scalar_maskz_fmadd_64: 74; CHECK: # %bb.0: # %entry 75; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 76; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] 77; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 78; CHECK-NEXT: retq # encoding: [0xc3] 79entry: 80 %0 = extractelement <2 x double> %a, i64 0 81 %1 = extractelement <2 x double> %b, i64 0 82 %2 = extractelement <2 x double> %c, i64 0 83 %3 = fmul fast double %1, %0 84 %4 = fadd fast double %3, %2 85 %5 = bitcast i8 %k to <8 x i1> 86 %6 = extractelement <8 x i1> %5, i64 0 87 %7 = select i1 %6, double %4, double 0.000000e+00 88 %8 = insertelement <2 x double> %a, double %7, i64 0 89 ret <2 x double> %8 90} 91 92define <2 x double> @combine_scalar_mask3_fmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 93; CHECK-LABEL: combine_scalar_mask3_fmadd_32: 94; CHECK: # %bb.0: # %entry 95; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 96; CHECK-NEXT: vfmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xb9,0xd0] 97; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) + xmm2 98; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 99; CHECK-NEXT: retq # encoding: [0xc3] 100entry: 101 %0 = bitcast <2 x double> %a to <4 x float> 102 %1 = bitcast <2 x double> %b to <4 x float> 103 %2 = bitcast <2 x double> %c to <4 x float> 104 %3 = extractelement <4 x float> %0, i64 0 105 %4 = extractelement <4 x float> %1, i64 0 106 %5 = extractelement <4 x float> %2, i64 0 107 %6 = fmul fast float %4, %3 108 %7 = fadd fast float %6, %5 109 %8 = bitcast i8 %k to <8 x i1> 110 %9 = extractelement <8 x i1> %8, i64 0 111 %10 = select i1 %9, float %7, float %5 112 %11 = insertelement <4 x float> %2, float %10, i64 0 113 %12 = bitcast <4 x float> %11 to <2 x double> 114 ret <2 x double> %12 115} 116 117define <2 x double> @combine_scalar_mask3_fmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 118; CHECK-LABEL: combine_scalar_mask3_fmadd_64: 119; CHECK: # %bb.0: # %entry 120; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 121; CHECK-NEXT: vfmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xb9,0xd0] 122; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) + xmm2 123; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 124; CHECK-NEXT: retq # encoding: [0xc3] 125entry: 126 %0 = extractelement <2 x double> %a, i64 0 127 %1 = extractelement <2 x double> %b, i64 0 128 %2 = extractelement <2 x double> %c, i64 0 129 %3 = fmul fast double %1, %0 130 %4 = fadd fast double %3, %2 131 %5 = bitcast i8 %k to <8 x i1> 132 %6 = extractelement <8 x i1> %5, i64 0 133 %7 = select i1 %6, double %4, double %2 134 %8 = insertelement <2 x double> %c, double %7, i64 0 135 ret <2 x double> %8 136} 137 138define <2 x double> @combine_scalar_mask_fmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 139; CHECK-LABEL: combine_scalar_mask_fmsub_f32: 140; CHECK: # %bb.0: # %entry 141; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 142; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] 143; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) - xmm2 144; CHECK-NEXT: retq # encoding: [0xc3] 145entry: 146 %0 = bitcast <2 x double> %a to <4 x float> 147 %1 = bitcast <2 x double> %b to <4 x float> 148 %2 = bitcast <2 x double> %c to <4 x float> 149 %3 = extractelement <4 x float> %0, i64 0 150 %4 = extractelement <4 x float> %1, i64 0 151 %5 = extractelement <4 x float> %2, i64 0 152 %6 = fmul fast float %4, %3 153 %7 = fsub fast float %6, %5 154 %8 = bitcast i8 %k to <8 x i1> 155 %9 = extractelement <8 x i1> %8, i64 0 156 %10 = select i1 %9, float %7, float %3 157 %11 = insertelement <4 x float> %0, float %10, i64 0 158 %12 = bitcast <4 x float> %11 to <2 x double> 159 ret <2 x double> %12 160} 161 162define <2 x double> @combine_scalar_mask_fmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 163; CHECK-LABEL: combine_scalar_mask_fmsub_f64: 164; CHECK: # %bb.0: # %entry 165; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 166; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] 167; CHECK-NEXT: # xmm0 {%k1} = (xmm1 * xmm0) - xmm2 168; CHECK-NEXT: retq # encoding: [0xc3] 169entry: 170 %0 = extractelement <2 x double> %a, i64 0 171 %1 = extractelement <2 x double> %b, i64 0 172 %2 = extractelement <2 x double> %c, i64 0 173 %3 = fmul fast double %1, %0 174 %4 = fsub fast double %3, %2 175 %5 = bitcast i8 %k to <8 x i1> 176 %6 = extractelement <8 x i1> %5, i64 0 177 %7 = select i1 %6, double %4, double %0 178 %8 = insertelement <2 x double> %a, double %7, i64 0 179 ret <2 x double> %8 180} 181 182define <2 x double> @combine_scalar_maskz_fmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 183; CHECK-LABEL: combine_scalar_maskz_fmsub_32: 184; CHECK: # %bb.0: # %entry 185; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 186; CHECK-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] 187; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 188; CHECK-NEXT: retq # encoding: [0xc3] 189entry: 190 %0 = bitcast <2 x double> %a to <4 x float> 191 %1 = bitcast <2 x double> %b to <4 x float> 192 %2 = bitcast <2 x double> %c to <4 x float> 193 %3 = extractelement <4 x float> %0, i64 0 194 %4 = extractelement <4 x float> %1, i64 0 195 %5 = extractelement <4 x float> %2, i64 0 196 %6 = fmul fast float %4, %3 197 %7 = fsub fast float %6, %5 198 %8 = bitcast i8 %k to <8 x i1> 199 %9 = extractelement <8 x i1> %8, i64 0 200 %10 = select i1 %9, float %7, float 0.000000e+00 201 %11 = insertelement <4 x float> %0, float %10, i64 0 202 %12 = bitcast <4 x float> %11 to <2 x double> 203 ret <2 x double> %12 204} 205 206define <2 x double> @combine_scalar_maskz_fmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 207; CHECK-LABEL: combine_scalar_maskz_fmsub_64: 208; CHECK: # %bb.0: # %entry 209; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 210; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] 211; CHECK-NEXT: # xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 212; CHECK-NEXT: retq # encoding: [0xc3] 213entry: 214 %0 = extractelement <2 x double> %a, i64 0 215 %1 = extractelement <2 x double> %b, i64 0 216 %2 = extractelement <2 x double> %c, i64 0 217 %3 = fmul fast double %1, %0 218 %4 = fsub fast double %3, %2 219 %5 = bitcast i8 %k to <8 x i1> 220 %6 = extractelement <8 x i1> %5, i64 0 221 %7 = select i1 %6, double %4, double 0.000000e+00 222 %8 = insertelement <2 x double> %a, double %7, i64 0 223 ret <2 x double> %8 224} 225 226define <2 x double> @combine_scalar_mask3_fmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 227; CHECK-LABEL: combine_scalar_mask3_fmsub_32: 228; CHECK: # %bb.0: # %entry 229; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 230; CHECK-NEXT: vfmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbb,0xd0] 231; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) - xmm2 232; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 233; CHECK-NEXT: retq # encoding: [0xc3] 234entry: 235 %0 = bitcast <2 x double> %a to <4 x float> 236 %1 = bitcast <2 x double> %b to <4 x float> 237 %2 = bitcast <2 x double> %c to <4 x float> 238 %3 = extractelement <4 x float> %0, i64 0 239 %4 = extractelement <4 x float> %1, i64 0 240 %5 = extractelement <4 x float> %2, i64 0 241 %6 = fmul fast float %4, %3 242 %7 = fsub fast float %6, %5 243 %8 = bitcast i8 %k to <8 x i1> 244 %9 = extractelement <8 x i1> %8, i64 0 245 %10 = select i1 %9, float %7, float %5 246 %11 = insertelement <4 x float> %2, float %10, i64 0 247 %12 = bitcast <4 x float> %11 to <2 x double> 248 ret <2 x double> %12 249} 250 251define <2 x double> @combine_scalar_mask3_fmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 252; CHECK-LABEL: combine_scalar_mask3_fmsub_64: 253; CHECK: # %bb.0: # %entry 254; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 255; CHECK-NEXT: vfmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbb,0xd0] 256; CHECK-NEXT: # xmm2 {%k1} = (xmm1 * xmm0) - xmm2 257; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 258; CHECK-NEXT: retq # encoding: [0xc3] 259entry: 260 %0 = extractelement <2 x double> %a, i64 0 261 %1 = extractelement <2 x double> %b, i64 0 262 %2 = extractelement <2 x double> %c, i64 0 263 %3 = fmul fast double %1, %0 264 %4 = fsub fast double %3, %2 265 %5 = bitcast i8 %k to <8 x i1> 266 %6 = extractelement <8 x i1> %5, i64 0 267 %7 = select i1 %6, double %4, double %2 268 %8 = insertelement <2 x double> %c, double %7, i64 0 269 ret <2 x double> %8 270} 271 272define <2 x double> @combine_scalar_mask_fnmadd_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 273; CHECK-LABEL: combine_scalar_mask_fnmadd_f32: 274; CHECK: # %bb.0: # %entry 275; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 276; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] 277; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 278; CHECK-NEXT: retq # encoding: [0xc3] 279entry: 280 %0 = bitcast <2 x double> %a to <4 x float> 281 %1 = bitcast <2 x double> %b to <4 x float> 282 %2 = bitcast <2 x double> %c to <4 x float> 283 %3 = extractelement <4 x float> %0, i64 0 284 %4 = extractelement <4 x float> %1, i64 0 285 %5 = extractelement <4 x float> %2, i64 0 286 %6 = fmul fast float %4, %3 287 %7 = fsub fast float %5, %6 288 %8 = bitcast i8 %k to <8 x i1> 289 %9 = extractelement <8 x i1> %8, i64 0 290 %10 = select i1 %9, float %7, float %3 291 %11 = insertelement <4 x float> %0, float %10, i64 0 292 %12 = bitcast <4 x float> %11 to <2 x double> 293 ret <2 x double> %12 294} 295 296define <2 x double> @combine_scalar_mask_fnmadd_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 297; CHECK-LABEL: combine_scalar_mask_fnmadd_f64: 298; CHECK: # %bb.0: # %entry 299; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 300; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] 301; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 302; CHECK-NEXT: retq # encoding: [0xc3] 303entry: 304 %0 = extractelement <2 x double> %a, i64 0 305 %1 = extractelement <2 x double> %b, i64 0 306 %2 = extractelement <2 x double> %c, i64 0 307 %3 = fmul fast double %1, %0 308 %4 = fsub fast double %2, %3 309 %5 = bitcast i8 %k to <8 x i1> 310 %6 = extractelement <8 x i1> %5, i64 0 311 %7 = select i1 %6, double %4, double %0 312 %8 = insertelement <2 x double> %a, double %7, i64 0 313 ret <2 x double> %8 314} 315 316define <2 x double> @combine_scalar_maskz_fnmadd_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 317; CHECK-LABEL: combine_scalar_maskz_fnmadd_32: 318; CHECK: # %bb.0: # %entry 319; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 320; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] 321; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 322; CHECK-NEXT: retq # encoding: [0xc3] 323entry: 324 %0 = bitcast <2 x double> %a to <4 x float> 325 %1 = bitcast <2 x double> %b to <4 x float> 326 %2 = bitcast <2 x double> %c to <4 x float> 327 %3 = extractelement <4 x float> %0, i64 0 328 %4 = extractelement <4 x float> %1, i64 0 329 %5 = extractelement <4 x float> %2, i64 0 330 %6 = fmul fast float %4, %3 331 %7 = fsub fast float %5, %6 332 %8 = bitcast i8 %k to <8 x i1> 333 %9 = extractelement <8 x i1> %8, i64 0 334 %10 = select i1 %9, float %7, float 0.000000e+00 335 %11 = insertelement <4 x float> %0, float %10, i64 0 336 %12 = bitcast <4 x float> %11 to <2 x double> 337 ret <2 x double> %12 338} 339 340define <2 x double> @combine_scalar_maskz_fnmadd_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 341; CHECK-LABEL: combine_scalar_maskz_fnmadd_64: 342; CHECK: # %bb.0: # %entry 343; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 344; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] 345; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 346; CHECK-NEXT: retq # encoding: [0xc3] 347entry: 348 %0 = extractelement <2 x double> %a, i64 0 349 %1 = extractelement <2 x double> %b, i64 0 350 %2 = extractelement <2 x double> %c, i64 0 351 %3 = fmul fast double %1, %0 352 %4 = fsub fast double %2, %3 353 %5 = bitcast i8 %k to <8 x i1> 354 %6 = extractelement <8 x i1> %5, i64 0 355 %7 = select i1 %6, double %4, double 0.000000e+00 356 %8 = insertelement <2 x double> %a, double %7, i64 0 357 ret <2 x double> %8 358} 359 360define <2 x double> @combine_scalar_mask3_fnmadd_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 361; CHECK-LABEL: combine_scalar_mask3_fnmadd_32: 362; CHECK: # %bb.0: # %entry 363; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 364; CHECK-NEXT: vfnmadd231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbd,0xd0] 365; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) + xmm2 366; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 367; CHECK-NEXT: retq # encoding: [0xc3] 368entry: 369 %0 = bitcast <2 x double> %a to <4 x float> 370 %1 = bitcast <2 x double> %b to <4 x float> 371 %2 = bitcast <2 x double> %c to <4 x float> 372 %3 = extractelement <4 x float> %0, i64 0 373 %4 = extractelement <4 x float> %1, i64 0 374 %5 = extractelement <4 x float> %2, i64 0 375 %6 = fmul fast float %4, %3 376 %7 = fsub fast float %5, %6 377 %8 = bitcast i8 %k to <8 x i1> 378 %9 = extractelement <8 x i1> %8, i64 0 379 %10 = select i1 %9, float %7, float %5 380 %11 = insertelement <4 x float> %2, float %10, i64 0 381 %12 = bitcast <4 x float> %11 to <2 x double> 382 ret <2 x double> %12 383} 384 385define <2 x double> @combine_scalar_mask3_fnmadd_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 386; CHECK-LABEL: combine_scalar_mask3_fnmadd_64: 387; CHECK: # %bb.0: # %entry 388; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 389; CHECK-NEXT: vfnmadd231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbd,0xd0] 390; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) + xmm2 391; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 392; CHECK-NEXT: retq # encoding: [0xc3] 393entry: 394 %0 = extractelement <2 x double> %a, i64 0 395 %1 = extractelement <2 x double> %b, i64 0 396 %2 = extractelement <2 x double> %c, i64 0 397 %3 = fmul fast double %1, %0 398 %4 = fsub fast double %2, %3 399 %5 = bitcast i8 %k to <8 x i1> 400 %6 = extractelement <8 x i1> %5, i64 0 401 %7 = select i1 %6, double %4, double %2 402 %8 = insertelement <2 x double> %c, double %7, i64 0 403 ret <2 x double> %8 404} 405 406define <2 x double> @combine_scalar_mask_fnmsub_f32(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 407; CHECK-LABEL: combine_scalar_mask_fnmsub_f32: 408; CHECK: # %bb.0: # %entry 409; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 410; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] 411; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 412; CHECK-NEXT: retq # encoding: [0xc3] 413entry: 414 %0 = bitcast <2 x double> %a to <4 x float> 415 %1 = bitcast <2 x double> %b to <4 x float> 416 %2 = bitcast <2 x double> %c to <4 x float> 417 %3 = extractelement <4 x float> %0, i64 0 418 %4 = extractelement <4 x float> %1, i64 0 419 %5 = extractelement <4 x float> %2, i64 0 420 %sub = fsub fast float -0.000000e+00, %5 421 %6 = fmul fast float %4, %3 422 %7 = fsub fast float %sub, %6 423 %8 = bitcast i8 %k to <8 x i1> 424 %9 = extractelement <8 x i1> %8, i64 0 425 %10 = select i1 %9, float %7, float %3 426 %11 = insertelement <4 x float> %0, float %10, i64 0 427 %12 = bitcast <4 x float> %11 to <2 x double> 428 ret <2 x double> %12 429} 430 431define <2 x double> @combine_scalar_mask_fnmsub_f64(<2 x double> %a, i8 zeroext %k, <2 x double> %b, <2 x double> %c) { 432; CHECK-LABEL: combine_scalar_mask_fnmsub_f64: 433; CHECK: # %bb.0: # %entry 434; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 435; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] 436; CHECK-NEXT: # xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 437; CHECK-NEXT: retq # encoding: [0xc3] 438entry: 439 %0 = extractelement <2 x double> %a, i64 0 440 %1 = extractelement <2 x double> %b, i64 0 441 %2 = extractelement <2 x double> %c, i64 0 442 %sub = fsub fast double -0.000000e+00, %2 443 %3 = fmul fast double %1, %0 444 %4 = fsub fast double %sub, %3 445 %5 = bitcast i8 %k to <8 x i1> 446 %6 = extractelement <8 x i1> %5, i64 0 447 %7 = select i1 %6, double %4, double %0 448 %8 = insertelement <2 x double> %a, double %7, i64 0 449 ret <2 x double> %8 450} 451 452define <2 x double> @combine_scalar_maskz_fnmsub_32(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 453; CHECK-LABEL: combine_scalar_maskz_fnmsub_32: 454; CHECK: # %bb.0: # %entry 455; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 456; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] 457; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 458; CHECK-NEXT: retq # encoding: [0xc3] 459entry: 460 %0 = bitcast <2 x double> %a to <4 x float> 461 %1 = bitcast <2 x double> %b to <4 x float> 462 %2 = bitcast <2 x double> %c to <4 x float> 463 %3 = extractelement <4 x float> %0, i64 0 464 %4 = extractelement <4 x float> %1, i64 0 465 %5 = extractelement <4 x float> %2, i64 0 466 %sub = fsub fast float -0.000000e+00, %5 467 %6 = fmul fast float %4, %3 468 %7 = fsub fast float %sub, %6 469 %8 = bitcast i8 %k to <8 x i1> 470 %9 = extractelement <8 x i1> %8, i64 0 471 %10 = select i1 %9, float %7, float 0.000000e+00 472 %11 = insertelement <4 x float> %0, float %10, i64 0 473 %12 = bitcast <4 x float> %11 to <2 x double> 474 ret <2 x double> %12 475} 476 477define <2 x double> @combine_scalar_maskz_fnmsub_64(i8 zeroext %k, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 478; CHECK-LABEL: combine_scalar_maskz_fnmsub_64: 479; CHECK: # %bb.0: # %entry 480; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 481; CHECK-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] 482; CHECK-NEXT: # xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 483; CHECK-NEXT: retq # encoding: [0xc3] 484entry: 485 %0 = extractelement <2 x double> %a, i64 0 486 %1 = extractelement <2 x double> %b, i64 0 487 %2 = extractelement <2 x double> %c, i64 0 488 %sub = fsub fast double -0.000000e+00, %2 489 %3 = fmul fast double %1, %0 490 %4 = fsub fast double %sub, %3 491 %5 = bitcast i8 %k to <8 x i1> 492 %6 = extractelement <8 x i1> %5, i64 0 493 %7 = select i1 %6, double %4, double 0.000000e+00 494 %8 = insertelement <2 x double> %a, double %7, i64 0 495 ret <2 x double> %8 496} 497 498define <2 x double> @combine_scalar_mask3_fnmsub_32(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 499; CHECK-LABEL: combine_scalar_mask3_fnmsub_32: 500; CHECK: # %bb.0: # %entry 501; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 502; CHECK-NEXT: vfnmsub231ss %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xbf,0xd0] 503; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) - xmm2 504; CHECK-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 505; CHECK-NEXT: retq # encoding: [0xc3] 506entry: 507 %0 = bitcast <2 x double> %a to <4 x float> 508 %1 = bitcast <2 x double> %b to <4 x float> 509 %2 = bitcast <2 x double> %c to <4 x float> 510 %3 = extractelement <4 x float> %0, i64 0 511 %4 = extractelement <4 x float> %1, i64 0 512 %5 = extractelement <4 x float> %2, i64 0 513 %sub = fsub fast float -0.000000e+00, %5 514 %6 = fmul fast float %4, %3 515 %7 = fsub fast float %sub, %6 516 %8 = bitcast i8 %k to <8 x i1> 517 %9 = extractelement <8 x i1> %8, i64 0 518 %10 = select i1 %9, float %7, float %5 519 %11 = insertelement <4 x float> %2, float %10, i64 0 520 %12 = bitcast <4 x float> %11 to <2 x double> 521 ret <2 x double> %12 522} 523 524define <2 x double> @combine_scalar_mask3_fnmsub_64(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 zeroext %k) { 525; CHECK-LABEL: combine_scalar_mask3_fnmsub_64: 526; CHECK: # %bb.0: # %entry 527; CHECK-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 528; CHECK-NEXT: vfnmsub231sd %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xbf,0xd0] 529; CHECK-NEXT: # xmm2 {%k1} = -(xmm1 * xmm0) - xmm2 530; CHECK-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 531; CHECK-NEXT: retq # encoding: [0xc3] 532entry: 533 %0 = extractelement <2 x double> %a, i64 0 534 %1 = extractelement <2 x double> %b, i64 0 535 %2 = extractelement <2 x double> %c, i64 0 536 %sub = fsub fast double -0.000000e+00, %2 537 %3 = fmul fast double %1, %0 538 %4 = fsub fast double %sub, %3 539 %5 = bitcast i8 %k to <8 x i1> 540 %6 = extractelement <8 x i1> %5, i64 0 541 %7 = select i1 %6, double %4, double %2 542 %8 = insertelement <2 x double> %c, double %7, i64 0 543 ret <2 x double> %8 544} 545 546; Don't fold into (fmul x, c1+c2) if reassoc not set 547define float @fma_const_fmul(float %x) { 548; CHECK-LABEL: fma_const_fmul: 549; CHECK: # %bb.0: 550; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0x0d,A,A,A,A] 551; CHECK-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 552; CHECK-NEXT: vfmadd132ss {{.*}}(%rip), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x99,0x05,A,A,A,A] 553; CHECK-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 554; CHECK-NEXT: # xmm0 = (xmm0 * mem) + xmm1 555; CHECK-NEXT: retq # encoding: [0xc3] 556 %mul1 = fmul contract float %x, 10.0 557 %mul2 = fmul contract float %x, 11.0 558 %add1 = fadd contract float %mul1, %mul2 559 ret float %add1 560} 561