1; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s 2target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 3target triple = "x86_64-apple-macosx10.10.0" 4 5; CHECK-LABEL: fmaddsubpd_loop 6; CHECK: [[BODYLBL:LBB.+]]: 7; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 8; CHECK: [[INCLBL:LBB.+]]: 9; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 10; CHECK: cmpl {{%.+}}, [[INDREG]] 11; CHECK: jl [[BODYLBL]] 12define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 13entry: 14 br label %for.cond 15 16for.cond: 17 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 18 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 19 %cmp = icmp slt i32 %i.0, %iter 20 br i1 %cmp, label %for.body, label %for.end 21 22for.body: 23 br label %for.inc 24 25for.inc: 26 %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 27 %inc = add nsw i32 %i.0, 1 28 br label %for.cond 29 30for.end: 31 ret <4 x double> %c.addr.0 32} 33 34; CHECK-LABEL: fmsubaddpd_loop 35; CHECK: [[BODYLBL:LBB.+]]: 36; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 37; CHECK: [[INCLBL:LBB.+]]: 38; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 39; CHECK: cmpl {{%.+}}, [[INDREG]] 40; CHECK: jl [[BODYLBL]] 41define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 42entry: 43 br label %for.cond 44 45for.cond: 46 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 47 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 48 %cmp = icmp slt i32 %i.0, %iter 49 br i1 %cmp, label %for.body, label %for.end 50 51for.body: 52 br label %for.inc 53 54for.inc: 55 %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 56 %inc = add nsw i32 %i.0, 1 57 br label %for.cond 58 59for.end: 60 ret <4 x double> %c.addr.0 61} 62 63; CHECK-LABEL: fmaddpd_loop 64; CHECK: [[BODYLBL:LBB.+]]: 65; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 66; CHECK: [[INCLBL:LBB.+]]: 67; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 68; CHECK: cmpl {{%.+}}, [[INDREG]] 69; CHECK: jl [[BODYLBL]] 70define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 71entry: 72 br label %for.cond 73 74for.cond: 75 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 76 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 77 %cmp = icmp slt i32 %i.0, %iter 78 br i1 %cmp, label %for.body, label %for.end 79 80for.body: 81 br label %for.inc 82 83for.inc: 84 %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 85 %inc = add nsw i32 %i.0, 1 86 br label %for.cond 87 88for.end: 89 ret <4 x double> %c.addr.0 90} 91 92; CHECK-LABEL: fmsubpd_loop 93; CHECK: [[BODYLBL:LBB.+]]: 94; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 95; CHECK: [[INCLBL:LBB.+]]: 96; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 97; CHECK: cmpl {{%.+}}, [[INDREG]] 98; CHECK: jl [[BODYLBL]] 99define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 100entry: 101 br label %for.cond 102 103for.cond: 104 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 105 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 106 %cmp = icmp slt i32 %i.0, %iter 107 br i1 %cmp, label %for.body, label %for.end 108 109for.body: 110 br label %for.inc 111 112for.inc: 113 %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 114 %inc = add nsw i32 %i.0, 1 115 br label %for.cond 116 117for.end: 118 ret <4 x double> %c.addr.0 119} 120 121declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 122declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 123declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 124declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 125 126 127; CHECK-LABEL: fmaddsubps_loop 128; CHECK: [[BODYLBL:LBB.+]]: 129; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 130; CHECK: [[INCLBL:LBB.+]]: 131; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 132; CHECK: cmpl {{%.+}}, [[INDREG]] 133; CHECK: jl [[BODYLBL]] 134define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 135entry: 136 br label %for.cond 137 138for.cond: 139 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 140 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 141 %cmp = icmp slt i32 %i.0, %iter 142 br i1 %cmp, label %for.body, label %for.end 143 144for.body: 145 br label %for.inc 146 147for.inc: 148 %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 149 %inc = add nsw i32 %i.0, 1 150 br label %for.cond 151 152for.end: 153 ret <8 x float> %c.addr.0 154} 155 156; CHECK-LABEL: fmsubaddps_loop 157; CHECK: [[BODYLBL:LBB.+]]: 158; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 159; CHECK: [[INCLBL:LBB.+]]: 160; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 161; CHECK: cmpl {{%.+}}, [[INDREG]] 162; CHECK: jl [[BODYLBL]] 163define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 164entry: 165 br label %for.cond 166 167for.cond: 168 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 169 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 170 %cmp = icmp slt i32 %i.0, %iter 171 br i1 %cmp, label %for.body, label %for.end 172 173for.body: 174 br label %for.inc 175 176for.inc: 177 %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 178 %inc = add nsw i32 %i.0, 1 179 br label %for.cond 180 181for.end: 182 ret <8 x float> %c.addr.0 183} 184 185; CHECK-LABEL: fmaddps_loop 186; CHECK: [[BODYLBL:LBB.+]]: 187; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 188; CHECK: [[INCLBL:LBB.+]]: 189; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 190; CHECK: cmpl {{%.+}}, [[INDREG]] 191; CHECK: jl [[BODYLBL]] 192define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 193entry: 194 br label %for.cond 195 196for.cond: 197 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 198 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 199 %cmp = icmp slt i32 %i.0, %iter 200 br i1 %cmp, label %for.body, label %for.end 201 202for.body: 203 br label %for.inc 204 205for.inc: 206 %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 207 %inc = add nsw i32 %i.0, 1 208 br label %for.cond 209 210for.end: 211 ret <8 x float> %c.addr.0 212} 213 214; CHECK-LABEL: fmsubps_loop 215; CHECK: [[BODYLBL:LBB.+]]: 216; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 217; CHECK: [[INCLBL:LBB.+]]: 218; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 219; CHECK: cmpl {{%.+}}, [[INDREG]] 220; CHECK: jl [[BODYLBL]] 221define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 222entry: 223 br label %for.cond 224 225for.cond: 226 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 227 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 228 %cmp = icmp slt i32 %i.0, %iter 229 br i1 %cmp, label %for.body, label %for.end 230 231for.body: 232 br label %for.inc 233 234for.inc: 235 %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 236 %inc = add nsw i32 %i.0, 1 237 br label %for.cond 238 239for.end: 240 ret <8 x float> %c.addr.0 241} 242 243declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 244declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 245declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 246declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 247