1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK 4 5define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 6; CHECK-LABEL: test_x86_fma_vfmadd_ps: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10] 9; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 10; CHECK-NEXT: retq # encoding: [0xc3] 11 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 12 ret <4 x float> %res 13} 14declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) 15 16define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 17; CHECK-LABEL: test_x86_fma_vfmadd_pd: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10] 20; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 21; CHECK-NEXT: retq # encoding: [0xc3] 22 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 23 ret <2 x double> %res 24} 25declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) 26 27define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 28; CHECK-LABEL: test_x86_fma_vfmadd_ps_256: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10] 31; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 32; CHECK-NEXT: retq # encoding: [0xc3] 33 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 34 ret <8 x float> %res 35} 36declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 37 38define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 39; CHECK-LABEL: test_x86_fma_vfmadd_pd_256: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10] 42; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 43; CHECK-NEXT: retq # encoding: [0xc3] 44 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 45 ret <4 x double> %res 46} 47declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 48 49; VFMSUB 50define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 51; CHECK-LABEL: test_x86_fma_vfmsub_ps: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10] 54; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 55; CHECK-NEXT: retq # encoding: [0xc3] 56 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 57 ret <4 x float> %res 58} 59declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) 60 61define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 62; CHECK-LABEL: test_x86_fma_vfmsub_pd: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10] 65; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) - xmm2 66; CHECK-NEXT: retq # encoding: [0xc3] 67 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 68 ret <2 x double> %res 69} 70declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) 71 72define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 73; CHECK-LABEL: test_x86_fma_vfmsub_ps_256: 74; CHECK: # %bb.0: 75; CHECK-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10] 76; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 77; CHECK-NEXT: retq # encoding: [0xc3] 78 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 79 ret <8 x float> %res 80} 81declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 82 83define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 84; CHECK-LABEL: test_x86_fma_vfmsub_pd_256: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10] 87; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) - ymm2 88; CHECK-NEXT: retq # encoding: [0xc3] 89 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 90 ret <4 x double> %res 91} 92declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 93 94; VFNMADD 95define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 96; CHECK-LABEL: test_x86_fma_vfnmadd_ps: 97; CHECK: # %bb.0: 98; CHECK-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10] 99; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 100; CHECK-NEXT: retq # encoding: [0xc3] 101 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 102 ret <4 x float> %res 103} 104declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) 105 106define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 107; CHECK-LABEL: test_x86_fma_vfnmadd_pd: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10] 110; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 111; CHECK-NEXT: retq # encoding: [0xc3] 112 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 113 ret <2 x double> %res 114} 115declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) 116 117define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 118; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10] 121; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 122; CHECK-NEXT: retq # encoding: [0xc3] 123 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 124 ret <8 x float> %res 125} 126declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 127 128define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 129; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10] 132; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 133; CHECK-NEXT: retq # encoding: [0xc3] 134 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 135 ret <4 x double> %res 136} 137declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 138 139; VFNMSUB 140define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 141; CHECK-LABEL: test_x86_fma_vfnmsub_ps: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10] 144; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 145; CHECK-NEXT: retq # encoding: [0xc3] 146 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 147 ret <4 x float> %res 148} 149declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) 150 151define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 152; CHECK-LABEL: test_x86_fma_vfnmsub_pd: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10] 155; CHECK-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 156; CHECK-NEXT: retq # encoding: [0xc3] 157 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 158 ret <2 x double> %res 159} 160declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) 161 162define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 163; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10] 166; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 167; CHECK-NEXT: retq # encoding: [0xc3] 168 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 169 ret <8 x float> %res 170} 171declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 172 173define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 174; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256: 175; CHECK: # %bb.0: 176; CHECK-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10] 177; CHECK-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 178; CHECK-NEXT: retq # encoding: [0xc3] 179 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 180 ret <4 x double> %res 181} 182declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 183 184; VFMADDSUB 185define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 186; CHECK-LABEL: test_x86_fma_vfmaddsub_ps: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10] 189; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 190; CHECK-NEXT: retq # encoding: [0xc3] 191 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 192 ret <4 x float> %res 193} 194declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) 195 196define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 197; CHECK-LABEL: test_x86_fma_vfmaddsub_pd: 198; CHECK: # %bb.0: 199; CHECK-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10] 200; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 201; CHECK-NEXT: retq # encoding: [0xc3] 202 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 203 ret <2 x double> %res 204} 205declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) 206 207define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 208; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10] 211; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 212; CHECK-NEXT: retq # encoding: [0xc3] 213 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 214 ret <8 x float> %res 215} 216declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 217 218define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 219; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10] 222; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 223; CHECK-NEXT: retq # encoding: [0xc3] 224 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 225 ret <4 x double> %res 226} 227declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 228 229; VFMSUBADD 230define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 231; CHECK-LABEL: test_x86_fma_vfmsubadd_ps: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10] 234; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 235; CHECK-NEXT: retq # encoding: [0xc3] 236 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 237 ret <4 x float> %res 238} 239declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) 240 241define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 242; CHECK-LABEL: test_x86_fma_vfmsubadd_pd: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10] 245; CHECK-NEXT: # xmm0 = (xmm0 * xmm1) -/+ xmm2 246; CHECK-NEXT: retq # encoding: [0xc3] 247 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 248 ret <2 x double> %res 249} 250declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) 251 252define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 253; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256: 254; CHECK: # %bb.0: 255; CHECK-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10] 256; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 257; CHECK-NEXT: retq # encoding: [0xc3] 258 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 259 ret <8 x float> %res 260} 261declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 262 263define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 264; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256: 265; CHECK: # %bb.0: 266; CHECK-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10] 267; CHECK-NEXT: # ymm0 = (ymm0 * ymm1) -/+ ymm2 268; CHECK-NEXT: retq # encoding: [0xc3] 269 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 270 ret <4 x double> %res 271} 272declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 273 274attributes #0 = { nounwind } 275