1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 4 5declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 6 7declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 8 9declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 10 11declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 12 13declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 14 15declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 16 17declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 18 19declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 20 21declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 22 23declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 24 25declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 26 27define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { 28; CHECK-LABEL: test_vmull_high_n_s16: 29; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 30; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 31entry: 32 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 33 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 34 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 35 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 36 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 37 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 38 ret <4 x i32> %vmull15.i.i 39} 40 41define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { 42; CHECK-LABEL: test_vmull_high_n_s32: 43; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 44; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 45entry: 46 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 47 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 48 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 49 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 50 ret <2 x i64> %vmull9.i.i 51} 52 53define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { 54; CHECK-LABEL: test_vmull_high_n_u16: 55; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 56; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 57entry: 58 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 59 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 60 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 61 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 62 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 63 %vmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 64 ret <4 x i32> %vmull15.i.i 65} 66 67define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { 68; CHECK-LABEL: test_vmull_high_n_u32: 69; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 70; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 71entry: 72 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 73 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 74 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 75 %vmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 76 ret <2 x i64> %vmull9.i.i 77} 78 79define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { 80; CHECK-LABEL: test_vqdmull_high_n_s16: 81; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 82; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 83entry: 84 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 85 %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 86 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 87 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 88 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 89 %vqdmull15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 90 ret <4 x i32> %vqdmull15.i.i 91} 92 93define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { 94; CHECK-LABEL: test_vqdmull_high_n_s32: 95; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 96; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 97entry: 98 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 99 %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 100 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 101 %vqdmull9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 102 ret <2 x i64> %vqdmull9.i.i 103} 104 105define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 106; CHECK-LABEL: test_vmlal_high_n_s16: 107; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 108; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 109entry: 110 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 111 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 112 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 113 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 114 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 115 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 116 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 117 ret <4 x i32> %add.i.i 118} 119 120define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 121; CHECK-LABEL: test_vmlal_high_n_s32: 122; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 123; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 124entry: 125 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 126 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 127 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 128 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 129 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 130 ret <2 x i64> %add.i.i 131} 132 133define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 134; CHECK-LABEL: test_vmlal_high_n_u16: 135; CHECK: dup [[REPLICATE:v[0-9]+]].8h, w0 136; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, [[REPLICATE]].8h 137entry: 138 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 139 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 140 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 141 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 142 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 143 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 144 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 145 ret <4 x i32> %add.i.i 146} 147 148define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 149; CHECK-LABEL: test_vmlal_high_n_u32: 150; CHECK: dup [[REPLICATE:v[0-9]+]].4s, w0 151; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, [[REPLICATE]].4s 152entry: 153 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 154 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 155 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 156 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 157 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 158 ret <2 x i64> %add.i.i 159} 160 161define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 162; CHECK-LABEL: test_vqdmlal_high_n_s16: 163; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 164entry: 165 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 166 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 167 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 168 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 169 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 170 %vqdmlal15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 171 %vqdmlal17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) 172 ret <4 x i32> %vqdmlal17.i.i 173} 174 175define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 176; CHECK-LABEL: test_vqdmlal_high_n_s32: 177; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 178entry: 179 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 180 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 181 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 182 %vqdmlal9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 183 %vqdmlal11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) 184 ret <2 x i64> %vqdmlal11.i.i 185} 186 187define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 188; CHECK-LABEL: test_vmlsl_high_n_s16: 189; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 190entry: 191 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 192 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 193 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 194 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 195 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 196 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 197 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 198 ret <4 x i32> %sub.i.i 199} 200 201define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 202; CHECK-LABEL: test_vmlsl_high_n_s32: 203; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 204entry: 205 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 206 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 207 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 208 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 209 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 210 ret <2 x i64> %sub.i.i 211} 212 213define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 214; CHECK-LABEL: test_vmlsl_high_n_u16: 215; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 216entry: 217 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 218 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 219 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 220 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 221 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 222 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 223 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 224 ret <4 x i32> %sub.i.i 225} 226 227define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 228; CHECK-LABEL: test_vmlsl_high_n_u32: 229; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 230entry: 231 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 232 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 233 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 234 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 235 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 236 ret <2 x i64> %sub.i.i 237} 238 239define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { 240; CHECK-LABEL: test_vqdmlsl_high_n_s16: 241; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h 242entry: 243 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 244 %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 245 %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 246 %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 247 %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 248 %vqdmlsl15.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) 249 %vqdmlsl17.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) 250 ret <4 x i32> %vqdmlsl17.i.i 251} 252 253define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { 254; CHECK-LABEL: test_vqdmlsl_high_n_s32: 255; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s 256entry: 257 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 258 %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 259 %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 260 %vqdmlsl9.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) 261 %vqdmlsl11.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) 262 ret <2 x i64> %vqdmlsl11.i.i 263} 264 265define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { 266; CHECK-LABEL: test_vmul_n_f32: 267; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 268entry: 269 %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 270 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 271 %mul.i = fmul <2 x float> %vecinit1.i, %a 272 ret <2 x float> %mul.i 273} 274 275define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { 276; CHECK-LABEL: test_vmulq_n_f32: 277; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 278entry: 279 %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 280 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 281 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 282 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 283 %mul.i = fmul <4 x float> %vecinit3.i, %a 284 ret <4 x float> %mul.i 285} 286 287define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { 288; CHECK-LABEL: test_vmulq_n_f64: 289; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 290entry: 291 %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 292 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 293 %mul.i = fmul <2 x double> %vecinit1.i, %a 294 ret <2 x double> %mul.i 295} 296 297define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { 298; CHECK-LABEL: test_vfma_n_f32: 299; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] 300entry: 301 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 302 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 303 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) 304 ret <2 x float> %0 305} 306 307define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { 308; CHECK-LABEL: test_vfmaq_n_f32: 309; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] 310entry: 311 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 312 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 313 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 314 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 315 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) 316 ret <4 x float> %0 317} 318 319define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { 320; CHECK-LABEL: test_vfms_n_f32: 321; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] 322entry: 323 %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 324 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 325 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 326 %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) 327 ret <2 x float> %1 328} 329 330define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { 331; CHECK-LABEL: test_vfmsq_n_f32: 332; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] 333entry: 334 %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 335 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 336 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 337 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 338 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 339 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) 340 ret <4 x float> %1 341} 342