1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mcpu=core-avx2 -show-mc-encoding | FileCheck %s --check-prefix=AVX2 3; RUN: llc < %s -disable-peephole -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=AVX512 4 5target triple = "x86_64-unknown-unknown" 6 7declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 8declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 9declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 10declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 11 12declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 13declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 14declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 15declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 16 17define void @fmadd_aab_ss(float* %a, float* %b) { 18; AVX2-LABEL: fmadd_aab_ss: 19; AVX2: # %bb.0: 20; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 21; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 22; AVX2-NEXT: vfmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xa9,0x06] 23; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) + mem 24; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 25; AVX2-NEXT: retq # encoding: [0xc3] 26; 27; AVX512-LABEL: fmadd_aab_ss: 28; AVX512: # %bb.0: 29; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 30; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 31; AVX512-NEXT: vfmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0x06] 32; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) + mem 33; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 34; AVX512-NEXT: retq # encoding: [0xc3] 35 %a.val = load float, float* %a 36 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 37 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 38 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 39 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 40 41 %b.val = load float, float* %b 42 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 43 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 44 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 45 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 46 47 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 48 49 %sr = extractelement <4 x float> %vr, i32 0 50 store float %sr, float* %a 51 ret void 52} 53 54define void @fmadd_aba_ss(float* %a, float* %b) { 55; AVX2-LABEL: fmadd_aba_ss: 56; AVX2: # %bb.0: 57; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 58; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 59; AVX2-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xb9,0x06] 60; AVX2-NEXT: # xmm0 = (xmm0 * mem) + xmm0 61; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 62; AVX2-NEXT: retq # encoding: [0xc3] 63; 64; AVX512-LABEL: fmadd_aba_ss: 65; AVX512: # %bb.0: 66; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 67; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 68; AVX512-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06] 69; AVX512-NEXT: # xmm0 = (xmm0 * mem) + xmm0 70; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 71; AVX512-NEXT: retq # encoding: [0xc3] 72 %a.val = load float, float* %a 73 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 74 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 75 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 76 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 77 78 %b.val = load float, float* %b 79 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 80 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 81 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 82 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 83 84 %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 85 86 %sr = extractelement <4 x float> %vr, i32 0 87 store float %sr, float* %a 88 ret void 89} 90 91define void @fmsub_aab_ss(float* %a, float* %b) { 92; AVX2-LABEL: fmsub_aab_ss: 93; AVX2: # %bb.0: 94; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 95; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 96; AVX2-NEXT: vfmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xab,0x06] 97; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) - mem 98; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 99; AVX2-NEXT: retq # encoding: [0xc3] 100; 101; AVX512-LABEL: fmsub_aab_ss: 102; AVX512: # %bb.0: 103; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 104; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 105; AVX512-NEXT: vfmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0x06] 106; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) - mem 107; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 108; AVX512-NEXT: retq # encoding: [0xc3] 109 %a.val = load float, float* %a 110 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 111 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 112 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 113 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 114 115 %b.val = load float, float* %b 116 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 117 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 118 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 119 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 120 121 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 122 123 %sr = extractelement <4 x float> %vr, i32 0 124 store float %sr, float* %a 125 ret void 126} 127 128define void @fmsub_aba_ss(float* %a, float* %b) { 129; AVX2-LABEL: fmsub_aba_ss: 130; AVX2: # %bb.0: 131; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 132; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 133; AVX2-NEXT: vfmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbb,0x06] 134; AVX2-NEXT: # xmm0 = (xmm0 * mem) - xmm0 135; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 136; AVX2-NEXT: retq # encoding: [0xc3] 137; 138; AVX512-LABEL: fmsub_aba_ss: 139; AVX512: # %bb.0: 140; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 141; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 142; AVX512-NEXT: vfmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0x06] 143; AVX512-NEXT: # xmm0 = (xmm0 * mem) - xmm0 144; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 145; AVX512-NEXT: retq # encoding: [0xc3] 146 %a.val = load float, float* %a 147 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 148 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 149 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 150 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 151 152 %b.val = load float, float* %b 153 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 154 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 155 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 156 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 157 158 %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 159 160 %sr = extractelement <4 x float> %vr, i32 0 161 store float %sr, float* %a 162 ret void 163} 164 165define void @fnmadd_aab_ss(float* %a, float* %b) { 166; AVX2-LABEL: fnmadd_aab_ss: 167; AVX2: # %bb.0: 168; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 169; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 170; AVX2-NEXT: vfnmadd213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xad,0x06] 171; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) + mem 172; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 173; AVX2-NEXT: retq # encoding: [0xc3] 174; 175; AVX512-LABEL: fnmadd_aab_ss: 176; AVX512: # %bb.0: 177; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 178; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 179; AVX512-NEXT: vfnmadd213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0x06] 180; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) + mem 181; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 182; AVX512-NEXT: retq # encoding: [0xc3] 183 %a.val = load float, float* %a 184 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 185 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 186 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 187 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 188 189 %b.val = load float, float* %b 190 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 191 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 192 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 193 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 194 195 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 196 197 %sr = extractelement <4 x float> %vr, i32 0 198 store float %sr, float* %a 199 ret void 200} 201 202define void @fnmadd_aba_ss(float* %a, float* %b) { 203; AVX2-LABEL: fnmadd_aba_ss: 204; AVX2: # %bb.0: 205; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 206; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 207; AVX2-NEXT: vfnmadd231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbd,0x06] 208; AVX2-NEXT: # xmm0 = -(xmm0 * mem) + xmm0 209; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 210; AVX2-NEXT: retq # encoding: [0xc3] 211; 212; AVX512-LABEL: fnmadd_aba_ss: 213; AVX512: # %bb.0: 214; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 215; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 216; AVX512-NEXT: vfnmadd231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbd,0x06] 217; AVX512-NEXT: # xmm0 = -(xmm0 * mem) + xmm0 218; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 219; AVX512-NEXT: retq # encoding: [0xc3] 220 %a.val = load float, float* %a 221 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 222 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 223 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 224 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 225 226 %b.val = load float, float* %b 227 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 228 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 229 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 230 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 231 232 %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 233 234 %sr = extractelement <4 x float> %vr, i32 0 235 store float %sr, float* %a 236 ret void 237} 238 239define void @fnmsub_aab_ss(float* %a, float* %b) { 240; AVX2-LABEL: fnmsub_aab_ss: 241; AVX2: # %bb.0: 242; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 243; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 244; AVX2-NEXT: vfnmsub213ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xaf,0x06] 245; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) - mem 246; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 247; AVX2-NEXT: retq # encoding: [0xc3] 248; 249; AVX512-LABEL: fnmsub_aab_ss: 250; AVX512: # %bb.0: 251; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 252; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 253; AVX512-NEXT: vfnmsub213ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0x06] 254; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) - mem 255; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 256; AVX512-NEXT: retq # encoding: [0xc3] 257 %a.val = load float, float* %a 258 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 259 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 260 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 261 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 262 263 %b.val = load float, float* %b 264 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 265 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 266 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 267 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 268 269 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv) 270 271 %sr = extractelement <4 x float> %vr, i32 0 272 store float %sr, float* %a 273 ret void 274} 275 276define void @fnmsub_aba_ss(float* %a, float* %b) { 277; AVX2-LABEL: fnmsub_aba_ss: 278; AVX2: # %bb.0: 279; AVX2-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 280; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero 281; AVX2-NEXT: vfnmsub231ss (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0xbf,0x06] 282; AVX2-NEXT: # xmm0 = -(xmm0 * mem) - xmm0 283; AVX2-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 284; AVX2-NEXT: retq # encoding: [0xc3] 285; 286; AVX512-LABEL: fnmsub_aba_ss: 287; AVX512: # %bb.0: 288; AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 289; AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 290; AVX512-NEXT: vfnmsub231ss (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0x06] 291; AVX512-NEXT: # xmm0 = -(xmm0 * mem) - xmm0 292; AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 293; AVX512-NEXT: retq # encoding: [0xc3] 294 %a.val = load float, float* %a 295 %av0 = insertelement <4 x float> undef, float %a.val, i32 0 296 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 297 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 298 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 299 300 %b.val = load float, float* %b 301 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 302 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 303 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 304 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 305 306 %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av) 307 308 %sr = extractelement <4 x float> %vr, i32 0 309 store float %sr, float* %a 310 ret void 311} 312 313define void @fmadd_aab_sd(double* %a, double* %b) { 314; AVX2-LABEL: fmadd_aab_sd: 315; AVX2: # %bb.0: 316; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 317; AVX2-NEXT: # xmm0 = mem[0],zero 318; AVX2-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xa9,0x06] 319; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) + mem 320; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 321; AVX2-NEXT: retq # encoding: [0xc3] 322; 323; AVX512-LABEL: fmadd_aab_sd: 324; AVX512: # %bb.0: 325; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 326; AVX512-NEXT: # xmm0 = mem[0],zero 327; AVX512-NEXT: vfmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0x06] 328; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) + mem 329; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 330; AVX512-NEXT: retq # encoding: [0xc3] 331 %a.val = load double, double* %a 332 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 333 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 334 335 %b.val = load double, double* %b 336 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 337 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 338 339 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 340 341 %sr = extractelement <2 x double> %vr, i32 0 342 store double %sr, double* %a 343 ret void 344} 345 346define void @fmadd_aba_sd(double* %a, double* %b) { 347; AVX2-LABEL: fmadd_aba_sd: 348; AVX2: # %bb.0: 349; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 350; AVX2-NEXT: # xmm0 = mem[0],zero 351; AVX2-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xb9,0x06] 352; AVX2-NEXT: # xmm0 = (xmm0 * mem) + xmm0 353; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 354; AVX2-NEXT: retq # encoding: [0xc3] 355; 356; AVX512-LABEL: fmadd_aba_sd: 357; AVX512: # %bb.0: 358; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 359; AVX512-NEXT: # xmm0 = mem[0],zero 360; AVX512-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06] 361; AVX512-NEXT: # xmm0 = (xmm0 * mem) + xmm0 362; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 363; AVX512-NEXT: retq # encoding: [0xc3] 364 %a.val = load double, double* %a 365 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 366 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 367 368 %b.val = load double, double* %b 369 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 370 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 371 372 %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 373 374 %sr = extractelement <2 x double> %vr, i32 0 375 store double %sr, double* %a 376 ret void 377} 378 379define void @fmsub_aab_sd(double* %a, double* %b) { 380; AVX2-LABEL: fmsub_aab_sd: 381; AVX2: # %bb.0: 382; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 383; AVX2-NEXT: # xmm0 = mem[0],zero 384; AVX2-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xab,0x06] 385; AVX2-NEXT: # xmm0 = (xmm0 * xmm0) - mem 386; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 387; AVX2-NEXT: retq # encoding: [0xc3] 388; 389; AVX512-LABEL: fmsub_aab_sd: 390; AVX512: # %bb.0: 391; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 392; AVX512-NEXT: # xmm0 = mem[0],zero 393; AVX512-NEXT: vfmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0x06] 394; AVX512-NEXT: # xmm0 = (xmm0 * xmm0) - mem 395; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 396; AVX512-NEXT: retq # encoding: [0xc3] 397 %a.val = load double, double* %a 398 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 399 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 400 401 %b.val = load double, double* %b 402 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 403 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 404 405 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 406 407 %sr = extractelement <2 x double> %vr, i32 0 408 store double %sr, double* %a 409 ret void 410} 411 412define void @fmsub_aba_sd(double* %a, double* %b) { 413; AVX2-LABEL: fmsub_aba_sd: 414; AVX2: # %bb.0: 415; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 416; AVX2-NEXT: # xmm0 = mem[0],zero 417; AVX2-NEXT: vfmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbb,0x06] 418; AVX2-NEXT: # xmm0 = (xmm0 * mem) - xmm0 419; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 420; AVX2-NEXT: retq # encoding: [0xc3] 421; 422; AVX512-LABEL: fmsub_aba_sd: 423; AVX512: # %bb.0: 424; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 425; AVX512-NEXT: # xmm0 = mem[0],zero 426; AVX512-NEXT: vfmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0x06] 427; AVX512-NEXT: # xmm0 = (xmm0 * mem) - xmm0 428; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 429; AVX512-NEXT: retq # encoding: [0xc3] 430 %a.val = load double, double* %a 431 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 432 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 433 434 %b.val = load double, double* %b 435 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 436 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 437 438 %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 439 440 %sr = extractelement <2 x double> %vr, i32 0 441 store double %sr, double* %a 442 ret void 443} 444 445define void @fnmadd_aab_sd(double* %a, double* %b) { 446; AVX2-LABEL: fnmadd_aab_sd: 447; AVX2: # %bb.0: 448; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 449; AVX2-NEXT: # xmm0 = mem[0],zero 450; AVX2-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xad,0x06] 451; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) + mem 452; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 453; AVX2-NEXT: retq # encoding: [0xc3] 454; 455; AVX512-LABEL: fnmadd_aab_sd: 456; AVX512: # %bb.0: 457; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 458; AVX512-NEXT: # xmm0 = mem[0],zero 459; AVX512-NEXT: vfnmadd213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0x06] 460; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) + mem 461; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 462; AVX512-NEXT: retq # encoding: [0xc3] 463 %a.val = load double, double* %a 464 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 465 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 466 467 %b.val = load double, double* %b 468 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 469 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 470 471 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 472 473 %sr = extractelement <2 x double> %vr, i32 0 474 store double %sr, double* %a 475 ret void 476} 477 478define void @fnmadd_aba_sd(double* %a, double* %b) { 479; AVX2-LABEL: fnmadd_aba_sd: 480; AVX2: # %bb.0: 481; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 482; AVX2-NEXT: # xmm0 = mem[0],zero 483; AVX2-NEXT: vfnmadd231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbd,0x06] 484; AVX2-NEXT: # xmm0 = -(xmm0 * mem) + xmm0 485; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 486; AVX2-NEXT: retq # encoding: [0xc3] 487; 488; AVX512-LABEL: fnmadd_aba_sd: 489; AVX512: # %bb.0: 490; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 491; AVX512-NEXT: # xmm0 = mem[0],zero 492; AVX512-NEXT: vfnmadd231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbd,0x06] 493; AVX512-NEXT: # xmm0 = -(xmm0 * mem) + xmm0 494; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 495; AVX512-NEXT: retq # encoding: [0xc3] 496 %a.val = load double, double* %a 497 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 498 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 499 500 %b.val = load double, double* %b 501 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 502 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 503 504 %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 505 506 %sr = extractelement <2 x double> %vr, i32 0 507 store double %sr, double* %a 508 ret void 509} 510 511define void @fnmsub_aab_sd(double* %a, double* %b) { 512; AVX2-LABEL: fnmsub_aab_sd: 513; AVX2: # %bb.0: 514; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 515; AVX2-NEXT: # xmm0 = mem[0],zero 516; AVX2-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xaf,0x06] 517; AVX2-NEXT: # xmm0 = -(xmm0 * xmm0) - mem 518; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 519; AVX2-NEXT: retq # encoding: [0xc3] 520; 521; AVX512-LABEL: fnmsub_aab_sd: 522; AVX512: # %bb.0: 523; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 524; AVX512-NEXT: # xmm0 = mem[0],zero 525; AVX512-NEXT: vfnmsub213sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0x06] 526; AVX512-NEXT: # xmm0 = -(xmm0 * xmm0) - mem 527; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 528; AVX512-NEXT: retq # encoding: [0xc3] 529 %a.val = load double, double* %a 530 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 531 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 532 533 %b.val = load double, double* %b 534 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 535 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 536 537 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv) 538 539 %sr = extractelement <2 x double> %vr, i32 0 540 store double %sr, double* %a 541 ret void 542} 543 544define void @fnmsub_aba_sd(double* %a, double* %b) { 545; AVX2-LABEL: fnmsub_aba_sd: 546; AVX2: # %bb.0: 547; AVX2-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] 548; AVX2-NEXT: # xmm0 = mem[0],zero 549; AVX2-NEXT: vfnmsub231sd (%rsi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0xbf,0x06] 550; AVX2-NEXT: # xmm0 = -(xmm0 * mem) - xmm0 551; AVX2-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] 552; AVX2-NEXT: retq # encoding: [0xc3] 553; 554; AVX512-LABEL: fnmsub_aba_sd: 555; AVX512: # %bb.0: 556; AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] 557; AVX512-NEXT: # xmm0 = mem[0],zero 558; AVX512-NEXT: vfnmsub231sd (%rsi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0x06] 559; AVX512-NEXT: # xmm0 = -(xmm0 * mem) - xmm0 560; AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] 561; AVX512-NEXT: retq # encoding: [0xc3] 562 %a.val = load double, double* %a 563 %av0 = insertelement <2 x double> undef, double %a.val, i32 0 564 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 565 566 %b.val = load double, double* %b 567 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 568 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 569 570 %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av) 571 572 %sr = extractelement <2 x double> %vr, i32 0 573 store double %sr, double* %a 574 ret void 575} 576 577 578