1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA 3; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA 4; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA 5 6attributes #0 = { nounwind } 7 8declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 9define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 10; FMA-LABEL: test_x86_fmadd_baa_ss: 11; FMA: # %bb.0: 12; FMA-NEXT: vmovaps (%rdx), %xmm0 13; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 14; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 15; FMA-NEXT: retq 16 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 17 ret <4 x float> %res 18} 19 20define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 21; FMA-LABEL: test_x86_fmadd_aba_ss: 22; FMA: # %bb.0: 23; FMA-NEXT: vmovaps (%rcx), %xmm0 24; FMA-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 25; FMA-NEXT: retq 26 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 27 ret <4 x float> %res 28} 29 30define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 31; FMA-LABEL: test_x86_fmadd_bba_ss: 32; FMA: # %bb.0: 33; FMA-NEXT: vmovaps (%rdx), %xmm0 34; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 35; FMA-NEXT: retq 36 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 37 ret <4 x float> %res 38} 39 40declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 41define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 42; FMA-LABEL: test_x86_fmadd_baa_ps: 43; FMA: # %bb.0: 44; FMA-NEXT: vmovaps (%rcx), %xmm0 45; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 46; FMA-NEXT: retq 47 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 48 ret <4 x float> %res 49} 50 51define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 52; FMA-LABEL: test_x86_fmadd_aba_ps: 53; FMA: # %bb.0: 54; FMA-NEXT: vmovaps (%rcx), %xmm0 55; FMA-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 56; FMA-NEXT: retq 57 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 58 ret <4 x float> %res 59} 60 61define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 62; FMA-LABEL: test_x86_fmadd_bba_ps: 63; FMA: # %bb.0: 64; FMA-NEXT: vmovaps (%rdx), %xmm0 65; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 66; FMA-NEXT: retq 67 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 68 ret <4 x float> %res 69} 70 71declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 72define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 73; FMA-LABEL: test_x86_fmadd_baa_ps_y: 74; FMA: # %bb.0: 75; FMA-NEXT: vmovaps (%rcx), %ymm0 76; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 77; FMA-NEXT: retq 78 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 79 ret <8 x float> %res 80} 81 82define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 83; FMA-LABEL: test_x86_fmadd_aba_ps_y: 84; FMA: # %bb.0: 85; FMA-NEXT: vmovaps (%rcx), %ymm0 86; FMA-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 87; FMA-NEXT: retq 88 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 89 ret <8 x float> %res 90} 91 92define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 93; FMA-LABEL: test_x86_fmadd_bba_ps_y: 94; FMA: # %bb.0: 95; FMA-NEXT: vmovaps (%rdx), %ymm0 96; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem 97; FMA-NEXT: retq 98 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 99 ret <8 x float> %res 100} 101 102declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 103define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 104; FMA-LABEL: test_x86_fmadd_baa_sd: 105; FMA: # %bb.0: 106; FMA-NEXT: vmovapd (%rdx), %xmm0 107; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 108; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 109; FMA-NEXT: retq 110 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 111 ret <2 x double> %res 112} 113 114define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 115; FMA-LABEL: test_x86_fmadd_aba_sd: 116; FMA: # %bb.0: 117; FMA-NEXT: vmovapd (%rcx), %xmm0 118; FMA-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 119; FMA-NEXT: retq 120 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 121 ret <2 x double> %res 122} 123 124define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 125; FMA-LABEL: test_x86_fmadd_bba_sd: 126; FMA: # %bb.0: 127; FMA-NEXT: vmovapd (%rdx), %xmm0 128; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 129; FMA-NEXT: retq 130 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 131 ret <2 x double> %res 132} 133 134declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 135define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 136; FMA-LABEL: test_x86_fmadd_baa_pd: 137; FMA: # %bb.0: 138; FMA-NEXT: vmovapd (%rcx), %xmm0 139; FMA-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 140; FMA-NEXT: retq 141 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 142 ret <2 x double> %res 143} 144 145define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 146; FMA-LABEL: test_x86_fmadd_aba_pd: 147; FMA: # %bb.0: 148; FMA-NEXT: vmovapd (%rcx), %xmm0 149; FMA-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0 150; FMA-NEXT: retq 151 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 152 ret <2 x double> %res 153} 154 155define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 156; FMA-LABEL: test_x86_fmadd_bba_pd: 157; FMA: # %bb.0: 158; FMA-NEXT: vmovapd (%rdx), %xmm0 159; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem 160; FMA-NEXT: retq 161 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 162 ret <2 x double> %res 163} 164 165declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 166define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 167; FMA-LABEL: test_x86_fmadd_baa_pd_y: 168; FMA: # %bb.0: 169; FMA-NEXT: vmovapd (%rcx), %ymm0 170; FMA-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 171; FMA-NEXT: retq 172 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 173 ret <4 x double> %res 174} 175 176define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 177; FMA-LABEL: test_x86_fmadd_aba_pd_y: 178; FMA: # %bb.0: 179; FMA-NEXT: vmovapd (%rcx), %ymm0 180; FMA-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0 181; FMA-NEXT: retq 182 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 183 ret <4 x double> %res 184} 185 186define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 187; FMA-LABEL: test_x86_fmadd_bba_pd_y: 188; FMA: # %bb.0: 189; FMA-NEXT: vmovapd (%rdx), %ymm0 190; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem 191; FMA-NEXT: retq 192 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 193 ret <4 x double> %res 194} 195 196 197declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 198define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 199; FMA-LABEL: test_x86_fnmadd_baa_ss: 200; FMA: # %bb.0: 201; FMA-NEXT: vmovaps (%rdx), %xmm0 202; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 203; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 204; FMA-NEXT: retq 205 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 206 ret <4 x float> %res 207} 208 209define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 210; FMA-LABEL: test_x86_fnmadd_aba_ss: 211; FMA: # %bb.0: 212; FMA-NEXT: vmovaps (%rcx), %xmm0 213; FMA-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 214; FMA-NEXT: retq 215 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 216 ret <4 x float> %res 217} 218 219define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 220; FMA-LABEL: test_x86_fnmadd_bba_ss: 221; FMA: # %bb.0: 222; FMA-NEXT: vmovaps (%rdx), %xmm0 223; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 224; FMA-NEXT: retq 225 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 226 ret <4 x float> %res 227} 228 229declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 230define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 231; FMA-LABEL: test_x86_fnmadd_baa_ps: 232; FMA: # %bb.0: 233; FMA-NEXT: vmovaps (%rcx), %xmm0 234; FMA-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 235; FMA-NEXT: retq 236 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 237 ret <4 x float> %res 238} 239 240define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 241; FMA-LABEL: test_x86_fnmadd_aba_ps: 242; FMA: # %bb.0: 243; FMA-NEXT: vmovaps (%rcx), %xmm0 244; FMA-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 245; FMA-NEXT: retq 246 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 247 ret <4 x float> %res 248} 249 250define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 251; FMA-LABEL: test_x86_fnmadd_bba_ps: 252; FMA: # %bb.0: 253; FMA-NEXT: vmovaps (%rdx), %xmm0 254; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 255; FMA-NEXT: retq 256 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 257 ret <4 x float> %res 258} 259 260declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 261define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 262; FMA-LABEL: test_x86_fnmadd_baa_ps_y: 263; FMA: # %bb.0: 264; FMA-NEXT: vmovaps (%rcx), %ymm0 265; FMA-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 266; FMA-NEXT: retq 267 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 268 ret <8 x float> %res 269} 270 271define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 272; FMA-LABEL: test_x86_fnmadd_aba_ps_y: 273; FMA: # %bb.0: 274; FMA-NEXT: vmovaps (%rcx), %ymm0 275; FMA-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 276; FMA-NEXT: retq 277 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 278 ret <8 x float> %res 279} 280 281define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 282; FMA-LABEL: test_x86_fnmadd_bba_ps_y: 283; FMA: # %bb.0: 284; FMA-NEXT: vmovaps (%rdx), %ymm0 285; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem 286; FMA-NEXT: retq 287 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 288 ret <8 x float> %res 289} 290 291declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 292define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 293; FMA-LABEL: test_x86_fnmadd_baa_sd: 294; FMA: # %bb.0: 295; FMA-NEXT: vmovapd (%rdx), %xmm0 296; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 297; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 298; FMA-NEXT: retq 299 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 300 ret <2 x double> %res 301} 302 303define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 304; FMA-LABEL: test_x86_fnmadd_aba_sd: 305; FMA: # %bb.0: 306; FMA-NEXT: vmovapd (%rcx), %xmm0 307; FMA-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 308; FMA-NEXT: retq 309 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 310 ret <2 x double> %res 311} 312 313define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 314; FMA-LABEL: test_x86_fnmadd_bba_sd: 315; FMA: # %bb.0: 316; FMA-NEXT: vmovapd (%rdx), %xmm0 317; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 318; FMA-NEXT: retq 319 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 320 ret <2 x double> %res 321} 322 323declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 324define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 325; FMA-LABEL: test_x86_fnmadd_baa_pd: 326; FMA: # %bb.0: 327; FMA-NEXT: vmovapd (%rcx), %xmm0 328; FMA-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 329; FMA-NEXT: retq 330 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 331 ret <2 x double> %res 332} 333 334define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 335; FMA-LABEL: test_x86_fnmadd_aba_pd: 336; FMA: # %bb.0: 337; FMA-NEXT: vmovapd (%rcx), %xmm0 338; FMA-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0 339; FMA-NEXT: retq 340 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 341 ret <2 x double> %res 342} 343 344define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 345; FMA-LABEL: test_x86_fnmadd_bba_pd: 346; FMA: # %bb.0: 347; FMA-NEXT: vmovapd (%rdx), %xmm0 348; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem 349; FMA-NEXT: retq 350 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 351 ret <2 x double> %res 352} 353 354declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 355define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 356; FMA-LABEL: test_x86_fnmadd_baa_pd_y: 357; FMA: # %bb.0: 358; FMA-NEXT: vmovapd (%rcx), %ymm0 359; FMA-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 360; FMA-NEXT: retq 361 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 362 ret <4 x double> %res 363} 364 365define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 366; FMA-LABEL: test_x86_fnmadd_aba_pd_y: 367; FMA: # %bb.0: 368; FMA-NEXT: vmovapd (%rcx), %ymm0 369; FMA-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0 370; FMA-NEXT: retq 371 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 372 ret <4 x double> %res 373} 374 375define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 376; FMA-LABEL: test_x86_fnmadd_bba_pd_y: 377; FMA: # %bb.0: 378; FMA-NEXT: vmovapd (%rdx), %ymm0 379; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem 380; FMA-NEXT: retq 381 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 382 ret <4 x double> %res 383} 384 385declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 386define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 387; FMA-LABEL: test_x86_fmsub_baa_ss: 388; FMA: # %bb.0: 389; FMA-NEXT: vmovaps (%rdx), %xmm0 390; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 391; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 392; FMA-NEXT: retq 393 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 394 ret <4 x float> %res 395} 396 397define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 398; FMA-LABEL: test_x86_fmsub_aba_ss: 399; FMA: # %bb.0: 400; FMA-NEXT: vmovaps (%rcx), %xmm0 401; FMA-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 402; FMA-NEXT: retq 403 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 404 ret <4 x float> %res 405} 406 407define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 408; FMA-LABEL: test_x86_fmsub_bba_ss: 409; FMA: # %bb.0: 410; FMA-NEXT: vmovaps (%rdx), %xmm0 411; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 412; FMA-NEXT: retq 413 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 414 ret <4 x float> %res 415} 416 417declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 418define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 419; FMA-LABEL: test_x86_fmsub_baa_ps: 420; FMA: # %bb.0: 421; FMA-NEXT: vmovaps (%rcx), %xmm0 422; FMA-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 423; FMA-NEXT: retq 424 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 425 ret <4 x float> %res 426} 427 428define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 429; FMA-LABEL: test_x86_fmsub_aba_ps: 430; FMA: # %bb.0: 431; FMA-NEXT: vmovaps (%rcx), %xmm0 432; FMA-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 433; FMA-NEXT: retq 434 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 435 ret <4 x float> %res 436} 437 438define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 439; FMA-LABEL: test_x86_fmsub_bba_ps: 440; FMA: # %bb.0: 441; FMA-NEXT: vmovaps (%rdx), %xmm0 442; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 443; FMA-NEXT: retq 444 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 445 ret <4 x float> %res 446} 447 448declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 449define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 450; FMA-LABEL: test_x86_fmsub_baa_ps_y: 451; FMA: # %bb.0: 452; FMA-NEXT: vmovaps (%rcx), %ymm0 453; FMA-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 454; FMA-NEXT: retq 455 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 456 ret <8 x float> %res 457} 458 459define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 460; FMA-LABEL: test_x86_fmsub_aba_ps_y: 461; FMA: # %bb.0: 462; FMA-NEXT: vmovaps (%rcx), %ymm0 463; FMA-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 464; FMA-NEXT: retq 465 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 466 ret <8 x float> %res 467} 468 469define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 470; FMA-LABEL: test_x86_fmsub_bba_ps_y: 471; FMA: # %bb.0: 472; FMA-NEXT: vmovaps (%rdx), %ymm0 473; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem 474; FMA-NEXT: retq 475 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 476 ret <8 x float> %res 477} 478 479declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 480define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 481; FMA-LABEL: test_x86_fmsub_baa_sd: 482; FMA: # %bb.0: 483; FMA-NEXT: vmovapd (%rdx), %xmm0 484; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 485; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 486; FMA-NEXT: retq 487 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 488 ret <2 x double> %res 489} 490 491define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 492; FMA-LABEL: test_x86_fmsub_aba_sd: 493; FMA: # %bb.0: 494; FMA-NEXT: vmovapd (%rcx), %xmm0 495; FMA-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 496; FMA-NEXT: retq 497 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 498 ret <2 x double> %res 499} 500 501define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 502; FMA-LABEL: test_x86_fmsub_bba_sd: 503; FMA: # %bb.0: 504; FMA-NEXT: vmovapd (%rdx), %xmm0 505; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 506; FMA-NEXT: retq 507 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 508 ret <2 x double> %res 509} 510 511declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 512define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 513; FMA-LABEL: test_x86_fmsub_baa_pd: 514; FMA: # %bb.0: 515; FMA-NEXT: vmovapd (%rcx), %xmm0 516; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 517; FMA-NEXT: retq 518 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 519 ret <2 x double> %res 520} 521 522define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 523; FMA-LABEL: test_x86_fmsub_aba_pd: 524; FMA: # %bb.0: 525; FMA-NEXT: vmovapd (%rcx), %xmm0 526; FMA-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0 527; FMA-NEXT: retq 528 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 529 ret <2 x double> %res 530} 531 532define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 533; FMA-LABEL: test_x86_fmsub_bba_pd: 534; FMA: # %bb.0: 535; FMA-NEXT: vmovapd (%rdx), %xmm0 536; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem 537; FMA-NEXT: retq 538 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 539 ret <2 x double> %res 540} 541 542declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 543define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 544; FMA-LABEL: test_x86_fmsub_baa_pd_y: 545; FMA: # %bb.0: 546; FMA-NEXT: vmovapd (%rcx), %ymm0 547; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 548; FMA-NEXT: retq 549 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 550 ret <4 x double> %res 551} 552 553define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 554; FMA-LABEL: test_x86_fmsub_aba_pd_y: 555; FMA: # %bb.0: 556; FMA-NEXT: vmovapd (%rcx), %ymm0 557; FMA-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0 558; FMA-NEXT: retq 559 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 560 ret <4 x double> %res 561} 562 563define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 564; FMA-LABEL: test_x86_fmsub_bba_pd_y: 565; FMA: # %bb.0: 566; FMA-NEXT: vmovapd (%rdx), %ymm0 567; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem 568; FMA-NEXT: retq 569 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 570 ret <4 x double> %res 571} 572 573 574declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 575define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 { 576; FMA-LABEL: test_x86_fnmsub_baa_ss: 577; FMA: # %bb.0: 578; FMA-NEXT: vmovaps (%rdx), %xmm0 579; FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 580; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 581; FMA-NEXT: retq 582 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 583 ret <4 x float> %res 584} 585 586define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 { 587; FMA-LABEL: test_x86_fnmsub_aba_ss: 588; FMA: # %bb.0: 589; FMA-NEXT: vmovaps (%rcx), %xmm0 590; FMA-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 591; FMA-NEXT: retq 592 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 593 ret <4 x float> %res 594} 595 596define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 { 597; FMA-LABEL: test_x86_fnmsub_bba_ss: 598; FMA: # %bb.0: 599; FMA-NEXT: vmovaps (%rdx), %xmm0 600; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 601; FMA-NEXT: retq 602 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 603 ret <4 x float> %res 604} 605 606declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 607define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 { 608; FMA-LABEL: test_x86_fnmsub_baa_ps: 609; FMA: # %bb.0: 610; FMA-NEXT: vmovaps (%rcx), %xmm0 611; FMA-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 612; FMA-NEXT: retq 613 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind 614 ret <4 x float> %res 615} 616 617define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 { 618; FMA-LABEL: test_x86_fnmsub_aba_ps: 619; FMA: # %bb.0: 620; FMA-NEXT: vmovaps (%rcx), %xmm0 621; FMA-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 622; FMA-NEXT: retq 623 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind 624 ret <4 x float> %res 625} 626 627define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 { 628; FMA-LABEL: test_x86_fnmsub_bba_ps: 629; FMA: # %bb.0: 630; FMA-NEXT: vmovaps (%rdx), %xmm0 631; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 632; FMA-NEXT: retq 633 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind 634 ret <4 x float> %res 635} 636 637declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 638define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 { 639; FMA-LABEL: test_x86_fnmsub_baa_ps_y: 640; FMA: # %bb.0: 641; FMA-NEXT: vmovaps (%rcx), %ymm0 642; FMA-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 643; FMA-NEXT: retq 644 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind 645 ret <8 x float> %res 646} 647 648define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 649; FMA-LABEL: test_x86_fnmsub_aba_ps_y: 650; FMA: # %bb.0: 651; FMA-NEXT: vmovaps (%rcx), %ymm0 652; FMA-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 653; FMA-NEXT: retq 654 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind 655 ret <8 x float> %res 656} 657 658define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 { 659; FMA-LABEL: test_x86_fnmsub_bba_ps_y: 660; FMA: # %bb.0: 661; FMA-NEXT: vmovaps (%rdx), %ymm0 662; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem 663; FMA-NEXT: retq 664 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind 665 ret <8 x float> %res 666} 667 668declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 669define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 { 670; FMA-LABEL: test_x86_fnmsub_baa_sd: 671; FMA: # %bb.0: 672; FMA-NEXT: vmovapd (%rdx), %xmm0 673; FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 674; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 675; FMA-NEXT: retq 676 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 677 ret <2 x double> %res 678} 679 680define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 { 681; FMA-LABEL: test_x86_fnmsub_aba_sd: 682; FMA: # %bb.0: 683; FMA-NEXT: vmovapd (%rcx), %xmm0 684; FMA-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 685; FMA-NEXT: retq 686 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 687 ret <2 x double> %res 688} 689 690define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 { 691; FMA-LABEL: test_x86_fnmsub_bba_sd: 692; FMA: # %bb.0: 693; FMA-NEXT: vmovapd (%rdx), %xmm0 694; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 695; FMA-NEXT: retq 696 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 697 ret <2 x double> %res 698} 699 700declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 701define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 { 702; FMA-LABEL: test_x86_fnmsub_baa_pd: 703; FMA: # %bb.0: 704; FMA-NEXT: vmovapd (%rcx), %xmm0 705; FMA-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 706; FMA-NEXT: retq 707 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind 708 ret <2 x double> %res 709} 710 711define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 { 712; FMA-LABEL: test_x86_fnmsub_aba_pd: 713; FMA: # %bb.0: 714; FMA-NEXT: vmovapd (%rcx), %xmm0 715; FMA-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0 716; FMA-NEXT: retq 717 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind 718 ret <2 x double> %res 719} 720 721define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 { 722; FMA-LABEL: test_x86_fnmsub_bba_pd: 723; FMA: # %bb.0: 724; FMA-NEXT: vmovapd (%rdx), %xmm0 725; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem 726; FMA-NEXT: retq 727 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind 728 ret <2 x double> %res 729} 730 731declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 732define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 { 733; FMA-LABEL: test_x86_fnmsub_baa_pd_y: 734; FMA: # %bb.0: 735; FMA-NEXT: vmovapd (%rcx), %ymm0 736; FMA-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 737; FMA-NEXT: retq 738 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind 739 ret <4 x double> %res 740} 741 742define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 743; FMA-LABEL: test_x86_fnmsub_aba_pd_y: 744; FMA: # %bb.0: 745; FMA-NEXT: vmovapd (%rcx), %ymm0 746; FMA-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0 747; FMA-NEXT: retq 748 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind 749 ret <4 x double> %res 750} 751 752define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 { 753; FMA-LABEL: test_x86_fnmsub_bba_pd_y: 754; FMA: # %bb.0: 755; FMA-NEXT: vmovapd (%rdx), %ymm0 756; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem 757; FMA-NEXT: retq 758 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind 759 ret <4 x double> %res 760} 761 762