1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK-LABEL: sqadd8b: 5;CHECK: sqadd.8b 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %tmp2 = load <8 x i8>, <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10} 11 12define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13;CHECK-LABEL: sqadd4h: 14;CHECK: sqadd.4h 15 %tmp1 = load <4 x i16>, <4 x i16>* %A 16 %tmp2 = load <4 x i16>, <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19} 20 21define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22;CHECK-LABEL: sqadd2s: 23;CHECK: sqadd.2s 24 %tmp1 = load <2 x i32>, <2 x i32>* %A 25 %tmp2 = load <2 x i32>, <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28} 29 30define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 31;CHECK-LABEL: uqadd8b: 32;CHECK: uqadd.8b 33 %tmp1 = load <8 x i8>, <8 x i8>* %A 34 %tmp2 = load <8 x i8>, <8 x i8>* %B 35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 40;CHECK-LABEL: uqadd4h: 41;CHECK: uqadd.4h 42 %tmp1 = load <4 x i16>, <4 x i16>* %A 43 %tmp2 = load <4 x i16>, <4 x i16>* %B 44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 45 ret <4 x i16> %tmp3 46} 47 48define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 49;CHECK-LABEL: uqadd2s: 50;CHECK: uqadd.2s 51 %tmp1 = load <2 x i32>, <2 x i32>* %A 52 %tmp2 = load <2 x i32>, <2 x i32>* %B 53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 54 ret <2 x i32> %tmp3 55} 56 57define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 58;CHECK-LABEL: sqadd16b: 59;CHECK: sqadd.16b 60 %tmp1 = load <16 x i8>, <16 x i8>* %A 61 %tmp2 = load <16 x i8>, <16 x i8>* %B 62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 63 ret <16 x i8> %tmp3 64} 65 66define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 67;CHECK-LABEL: sqadd8h: 68;CHECK: sqadd.8h 69 %tmp1 = load <8 x i16>, <8 x i16>* %A 70 %tmp2 = load <8 x i16>, <8 x i16>* %B 71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 72 ret <8 x i16> %tmp3 73} 74 75define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 76;CHECK-LABEL: sqadd4s: 77;CHECK: sqadd.4s 78 %tmp1 = load <4 x i32>, <4 x i32>* %A 79 %tmp2 = load <4 x i32>, <4 x i32>* %B 80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 81 ret <4 x i32> %tmp3 82} 83 84define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 85;CHECK-LABEL: sqadd2d: 86;CHECK: sqadd.2d 87 %tmp1 = load <2 x i64>, <2 x i64>* %A 88 %tmp2 = load <2 x i64>, <2 x i64>* %B 89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 90 ret <2 x i64> %tmp3 91} 92 93define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 94;CHECK-LABEL: uqadd16b: 95;CHECK: uqadd.16b 96 %tmp1 = load <16 x i8>, <16 x i8>* %A 97 %tmp2 = load <16 x i8>, <16 x i8>* %B 98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 99 ret <16 x i8> %tmp3 100} 101 102define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103;CHECK-LABEL: uqadd8h: 104;CHECK: uqadd.8h 105 %tmp1 = load <8 x i16>, <8 x i16>* %A 106 %tmp2 = load <8 x i16>, <8 x i16>* %B 107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 108 ret <8 x i16> %tmp3 109} 110 111define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 112;CHECK-LABEL: uqadd4s: 113;CHECK: uqadd.4s 114 %tmp1 = load <4 x i32>, <4 x i32>* %A 115 %tmp2 = load <4 x i32>, <4 x i32>* %B 116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 117 ret <4 x i32> %tmp3 118} 119 120define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 121;CHECK-LABEL: uqadd2d: 122;CHECK: uqadd.2d 123 %tmp1 = load <2 x i64>, <2 x i64>* %A 124 %tmp2 = load <2 x i64>, <2 x i64>* %B 125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 126 ret <2 x i64> %tmp3 127} 128 129declare <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 130declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 131declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 132declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 133 134declare <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 135declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 136declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 137declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 138 139declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 140declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 141declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 142declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 143 144declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 145declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 146declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 147declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 148 149define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 150;CHECK-LABEL: usqadd8b: 151;CHECK: usqadd.8b 152 %tmp1 = load <8 x i8>, <8 x i8>* %A 153 %tmp2 = load <8 x i8>, <8 x i8>* %B 154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 155 ret <8 x i8> %tmp3 156} 157 158define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 159;CHECK-LABEL: usqadd4h: 160;CHECK: usqadd.4h 161 %tmp1 = load <4 x i16>, <4 x i16>* %A 162 %tmp2 = load <4 x i16>, <4 x i16>* %B 163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 164 ret <4 x i16> %tmp3 165} 166 167define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 168;CHECK-LABEL: usqadd2s: 169;CHECK: usqadd.2s 170 %tmp1 = load <2 x i32>, <2 x i32>* %A 171 %tmp2 = load <2 x i32>, <2 x i32>* %B 172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 173 ret <2 x i32> %tmp3 174} 175 176define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 177;CHECK-LABEL: usqadd16b: 178;CHECK: usqadd.16b 179 %tmp1 = load <16 x i8>, <16 x i8>* %A 180 %tmp2 = load <16 x i8>, <16 x i8>* %B 181 %tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 182 ret <16 x i8> %tmp3 183} 184 185define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 186;CHECK-LABEL: usqadd8h: 187;CHECK: usqadd.8h 188 %tmp1 = load <8 x i16>, <8 x i16>* %A 189 %tmp2 = load <8 x i16>, <8 x i16>* %B 190 %tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 191 ret <8 x i16> %tmp3 192} 193 194define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 195;CHECK-LABEL: usqadd4s: 196;CHECK: usqadd.4s 197 %tmp1 = load <4 x i32>, <4 x i32>* %A 198 %tmp2 = load <4 x i32>, <4 x i32>* %B 199 %tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 200 ret <4 x i32> %tmp3 201} 202 203define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 204;CHECK-LABEL: usqadd2d: 205;CHECK: usqadd.2d 206 %tmp1 = load <2 x i64>, <2 x i64>* %A 207 %tmp2 = load <2 x i64>, <2 x i64>* %B 208 %tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 209 ret <2 x i64> %tmp3 210} 211 212define i64 @usqadd_d(i64 %l, i64 %r) nounwind { 213; CHECK-LABEL: usqadd_d: 214; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} 215 %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r) 216 ret i64 %sum 217} 218 219define i32 @usqadd_s(i32 %l, i32 %r) nounwind { 220; CHECK-LABEL: usqadd_s: 221; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} 222 %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r) 223 ret i32 %sum 224} 225 226declare <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 227declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 228declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 229declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 230declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone 231declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone 232 233declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 234declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 235declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 236declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 237 238define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 239;CHECK-LABEL: suqadd8b: 240;CHECK: suqadd.8b 241 %tmp1 = load <8 x i8>, <8 x i8>* %A 242 %tmp2 = load <8 x i8>, <8 x i8>* %B 243 %tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 244 ret <8 x i8> %tmp3 245} 246 247define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 248;CHECK-LABEL: suqadd4h: 249;CHECK: suqadd.4h 250 %tmp1 = load <4 x i16>, <4 x i16>* %A 251 %tmp2 = load <4 x i16>, <4 x i16>* %B 252 %tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 253 ret <4 x i16> %tmp3 254} 255 256define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 257;CHECK-LABEL: suqadd2s: 258;CHECK: suqadd.2s 259 %tmp1 = load <2 x i32>, <2 x i32>* %A 260 %tmp2 = load <2 x i32>, <2 x i32>* %B 261 %tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 262 ret <2 x i32> %tmp3 263} 264 265define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 266;CHECK-LABEL: suqadd16b: 267;CHECK: suqadd.16b 268 %tmp1 = load <16 x i8>, <16 x i8>* %A 269 %tmp2 = load <16 x i8>, <16 x i8>* %B 270 %tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 271 ret <16 x i8> %tmp3 272} 273 274define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 275;CHECK-LABEL: suqadd8h: 276;CHECK: suqadd.8h 277 %tmp1 = load <8 x i16>, <8 x i16>* %A 278 %tmp2 = load <8 x i16>, <8 x i16>* %B 279 %tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 280 ret <8 x i16> %tmp3 281} 282 283define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 284;CHECK-LABEL: suqadd4s: 285;CHECK: suqadd.4s 286 %tmp1 = load <4 x i32>, <4 x i32>* %A 287 %tmp2 = load <4 x i32>, <4 x i32>* %B 288 %tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 289 ret <4 x i32> %tmp3 290} 291 292define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 293;CHECK-LABEL: suqadd2d: 294;CHECK: suqadd.2d 295 %tmp1 = load <2 x i64>, <2 x i64>* %A 296 %tmp2 = load <2 x i64>, <2 x i64>* %B 297 %tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 298 ret <2 x i64> %tmp3 299} 300 301define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind { 302; CHECK-LABEL: suqadd_1d: 303; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} 304 %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r) 305 ret <1 x i64> %sum 306} 307 308define i64 @suqadd_d(i64 %l, i64 %r) nounwind { 309; CHECK-LABEL: suqadd_d: 310; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} 311 %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r) 312 ret i64 %sum 313} 314 315define i32 @suqadd_s(i32 %l, i32 %r) nounwind { 316; CHECK-LABEL: suqadd_s: 317; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} 318 %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r) 319 ret i32 %sum 320} 321 322declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 323declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 324declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 325declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 326declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone 327declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone 328 329declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 330declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 331declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 332declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 333