1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3 4define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5;CHECK-LABEL: sabdl8h: 6;CHECK: sabdl.8h 7 %tmp1 = load <8 x i8>, <8 x i8>* %A 8 %tmp2 = load <8 x i8>, <8 x i8>* %B 9 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 10 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 11 ret <8 x i16> %tmp4 12} 13 14define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 15;CHECK-LABEL: sabdl4s: 16;CHECK: sabdl.4s 17 %tmp1 = load <4 x i16>, <4 x i16>* %A 18 %tmp2 = load <4 x i16>, <4 x i16>* %B 19 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 20 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 21 ret <4 x i32> %tmp4 22} 23 24define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 25;CHECK-LABEL: sabdl2d: 26;CHECK: sabdl.2d 27 %tmp1 = load <2 x i32>, <2 x i32>* %A 28 %tmp2 = load <2 x i32>, <2 x i32>* %B 29 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 30 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 31 ret <2 x i64> %tmp4 32} 33 34define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 35;CHECK-LABEL: sabdl2_8h: 36;CHECK: sabdl2.8h 37 %load1 = load <16 x i8>, <16 x i8>* %A 38 %load2 = load <16 x i8>, <16 x i8>* %B 39 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 40 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 41 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 42 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 43 ret <8 x i16> %tmp4 44} 45 46define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 47;CHECK-LABEL: sabdl2_4s: 48;CHECK: sabdl2.4s 49 %load1 = load <8 x i16>, <8 x i16>* %A 50 %load2 = load <8 x i16>, <8 x i16>* %B 51 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 52 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 53 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 54 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 55 ret <4 x i32> %tmp4 56} 57 58define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 59;CHECK-LABEL: sabdl2_2d: 60;CHECK: sabdl2.2d 61 %load1 = load <4 x i32>, <4 x i32>* %A 62 %load2 = load <4 x i32>, <4 x i32>* %B 63 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 64 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 65 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 66 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 67 ret <2 x i64> %tmp4 68} 69 70define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 71;CHECK-LABEL: uabdl8h: 72;CHECK: uabdl.8h 73 %tmp1 = load <8 x i8>, <8 x i8>* %A 74 %tmp2 = load <8 x i8>, <8 x i8>* %B 75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 76 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 77 ret <8 x i16> %tmp4 78} 79 80define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 81;CHECK-LABEL: uabdl4s: 82;CHECK: uabdl.4s 83 %tmp1 = load <4 x i16>, <4 x i16>* %A 84 %tmp2 = load <4 x i16>, <4 x i16>* %B 85 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 86 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 87 ret <4 x i32> %tmp4 88} 89 90define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 91;CHECK-LABEL: uabdl2d: 92;CHECK: uabdl.2d 93 %tmp1 = load <2 x i32>, <2 x i32>* %A 94 %tmp2 = load <2 x i32>, <2 x i32>* %B 95 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 96 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 97 ret <2 x i64> %tmp4 98} 99 100define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 101;CHECK-LABEL: uabdl2_8h: 102;CHECK: uabdl2.8h 103 %load1 = load <16 x i8>, <16 x i8>* %A 104 %load2 = load <16 x i8>, <16 x i8>* %B 105 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 106 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 107 108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 109 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 110 ret <8 x i16> %tmp4 111} 112 113define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 114;CHECK-LABEL: uabdl2_4s: 115;CHECK: uabdl2.4s 116 %load1 = load <8 x i16>, <8 x i16>* %A 117 %load2 = load <8 x i16>, <8 x i16>* %B 118 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 119 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 120 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 121 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 122 ret <4 x i32> %tmp4 123} 124 125define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 126;CHECK-LABEL: uabdl2_2d: 127;CHECK: uabdl2.2d 128 %load1 = load <4 x i32>, <4 x i32>* %A 129 %load2 = load <4 x i32>, <4 x i32>* %B 130 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 131 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 132 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 133 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 134 ret <2 x i64> %tmp4 135} 136 137define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 138;CHECK-LABEL: fabd_2s: 139;CHECK: fabd.2s 140 %tmp1 = load <2 x float>, <2 x float>* %A 141 %tmp2 = load <2 x float>, <2 x float>* %B 142 %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 143 ret <2 x float> %tmp3 144} 145 146define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 147;CHECK-LABEL: fabd_4s: 148;CHECK: fabd.4s 149 %tmp1 = load <4 x float>, <4 x float>* %A 150 %tmp2 = load <4 x float>, <4 x float>* %B 151 %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 152 ret <4 x float> %tmp3 153} 154 155define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 156;CHECK-LABEL: fabd_2d: 157;CHECK: fabd.2d 158 %tmp1 = load <2 x double>, <2 x double>* %A 159 %tmp2 = load <2 x double>, <2 x double>* %B 160 %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 161 ret <2 x double> %tmp3 162} 163 164declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) nounwind readnone 165declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) nounwind readnone 166declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) nounwind readnone 167 168define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 169;CHECK-LABEL: sabd_8b: 170;CHECK: sabd.8b 171 %tmp1 = load <8 x i8>, <8 x i8>* %A 172 %tmp2 = load <8 x i8>, <8 x i8>* %B 173 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 174 ret <8 x i8> %tmp3 175} 176 177define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 178;CHECK-LABEL: sabd_16b: 179;CHECK: sabd.16b 180 %tmp1 = load <16 x i8>, <16 x i8>* %A 181 %tmp2 = load <16 x i8>, <16 x i8>* %B 182 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 183 ret <16 x i8> %tmp3 184} 185 186define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 187;CHECK-LABEL: sabd_4h: 188;CHECK: sabd.4h 189 %tmp1 = load <4 x i16>, <4 x i16>* %A 190 %tmp2 = load <4 x i16>, <4 x i16>* %B 191 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 192 ret <4 x i16> %tmp3 193} 194 195define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 196;CHECK-LABEL: sabd_8h: 197;CHECK: sabd.8h 198 %tmp1 = load <8 x i16>, <8 x i16>* %A 199 %tmp2 = load <8 x i16>, <8 x i16>* %B 200 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 201 ret <8 x i16> %tmp3 202} 203 204define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 205;CHECK-LABEL: sabd_2s: 206;CHECK: sabd.2s 207 %tmp1 = load <2 x i32>, <2 x i32>* %A 208 %tmp2 = load <2 x i32>, <2 x i32>* %B 209 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 210 ret <2 x i32> %tmp3 211} 212 213define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 214;CHECK-LABEL: sabd_4s: 215;CHECK: sabd.4s 216 %tmp1 = load <4 x i32>, <4 x i32>* %A 217 %tmp2 = load <4 x i32>, <4 x i32>* %B 218 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 219 ret <4 x i32> %tmp3 220} 221 222declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 223declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 224declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 225declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 226declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 227declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 228 229define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 230;CHECK-LABEL: uabd_8b: 231;CHECK: uabd.8b 232 %tmp1 = load <8 x i8>, <8 x i8>* %A 233 %tmp2 = load <8 x i8>, <8 x i8>* %B 234 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 235 ret <8 x i8> %tmp3 236} 237 238define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 239;CHECK-LABEL: uabd_16b: 240;CHECK: uabd.16b 241 %tmp1 = load <16 x i8>, <16 x i8>* %A 242 %tmp2 = load <16 x i8>, <16 x i8>* %B 243 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 244 ret <16 x i8> %tmp3 245} 246 247define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 248;CHECK-LABEL: uabd_4h: 249;CHECK: uabd.4h 250 %tmp1 = load <4 x i16>, <4 x i16>* %A 251 %tmp2 = load <4 x i16>, <4 x i16>* %B 252 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 253 ret <4 x i16> %tmp3 254} 255 256define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 257;CHECK-LABEL: uabd_8h: 258;CHECK: uabd.8h 259 %tmp1 = load <8 x i16>, <8 x i16>* %A 260 %tmp2 = load <8 x i16>, <8 x i16>* %B 261 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 262 ret <8 x i16> %tmp3 263} 264 265define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 266;CHECK-LABEL: uabd_2s: 267;CHECK: uabd.2s 268 %tmp1 = load <2 x i32>, <2 x i32>* %A 269 %tmp2 = load <2 x i32>, <2 x i32>* %B 270 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 271 ret <2 x i32> %tmp3 272} 273 274define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 275;CHECK-LABEL: uabd_4s: 276;CHECK: uabd.4s 277 %tmp1 = load <4 x i32>, <4 x i32>* %A 278 %tmp2 = load <4 x i32>, <4 x i32>* %B 279 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 280 ret <4 x i32> %tmp3 281} 282 283declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 284declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 285declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 286declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 287declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 288declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 289 290define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind { 291;CHECK-LABEL: sqabs_8b: 292;CHECK: sqabs.8b 293 %tmp1 = load <8 x i8>, <8 x i8>* %A 294 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1) 295 ret <8 x i8> %tmp3 296} 297 298define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind { 299;CHECK-LABEL: sqabs_16b: 300;CHECK: sqabs.16b 301 %tmp1 = load <16 x i8>, <16 x i8>* %A 302 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1) 303 ret <16 x i8> %tmp3 304} 305 306define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind { 307;CHECK-LABEL: sqabs_4h: 308;CHECK: sqabs.4h 309 %tmp1 = load <4 x i16>, <4 x i16>* %A 310 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1) 311 ret <4 x i16> %tmp3 312} 313 314define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind { 315;CHECK-LABEL: sqabs_8h: 316;CHECK: sqabs.8h 317 %tmp1 = load <8 x i16>, <8 x i16>* %A 318 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1) 319 ret <8 x i16> %tmp3 320} 321 322define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind { 323;CHECK-LABEL: sqabs_2s: 324;CHECK: sqabs.2s 325 %tmp1 = load <2 x i32>, <2 x i32>* %A 326 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1) 327 ret <2 x i32> %tmp3 328} 329 330define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind { 331;CHECK-LABEL: sqabs_4s: 332;CHECK: sqabs.4s 333 %tmp1 = load <4 x i32>, <4 x i32>* %A 334 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1) 335 ret <4 x i32> %tmp3 336} 337 338declare <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8>) nounwind readnone 339declare <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8>) nounwind readnone 340declare <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16>) nounwind readnone 341declare <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16>) nounwind readnone 342declare <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32>) nounwind readnone 343declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone 344 345define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind { 346;CHECK-LABEL: sqneg_8b: 347;CHECK: sqneg.8b 348 %tmp1 = load <8 x i8>, <8 x i8>* %A 349 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1) 350 ret <8 x i8> %tmp3 351} 352 353define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind { 354;CHECK-LABEL: sqneg_16b: 355;CHECK: sqneg.16b 356 %tmp1 = load <16 x i8>, <16 x i8>* %A 357 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1) 358 ret <16 x i8> %tmp3 359} 360 361define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind { 362;CHECK-LABEL: sqneg_4h: 363;CHECK: sqneg.4h 364 %tmp1 = load <4 x i16>, <4 x i16>* %A 365 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1) 366 ret <4 x i16> %tmp3 367} 368 369define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind { 370;CHECK-LABEL: sqneg_8h: 371;CHECK: sqneg.8h 372 %tmp1 = load <8 x i16>, <8 x i16>* %A 373 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1) 374 ret <8 x i16> %tmp3 375} 376 377define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind { 378;CHECK-LABEL: sqneg_2s: 379;CHECK: sqneg.2s 380 %tmp1 = load <2 x i32>, <2 x i32>* %A 381 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1) 382 ret <2 x i32> %tmp3 383} 384 385define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind { 386;CHECK-LABEL: sqneg_4s: 387;CHECK: sqneg.4s 388 %tmp1 = load <4 x i32>, <4 x i32>* %A 389 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1) 390 ret <4 x i32> %tmp3 391} 392 393declare <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8>) nounwind readnone 394declare <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8>) nounwind readnone 395declare <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16>) nounwind readnone 396declare <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16>) nounwind readnone 397declare <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32>) nounwind readnone 398declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone 399 400define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind { 401;CHECK-LABEL: abs_8b: 402;CHECK: abs.8b 403 %tmp1 = load <8 x i8>, <8 x i8>* %A 404 %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1) 405 ret <8 x i8> %tmp3 406} 407 408define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind { 409;CHECK-LABEL: abs_16b: 410;CHECK: abs.16b 411 %tmp1 = load <16 x i8>, <16 x i8>* %A 412 %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1) 413 ret <16 x i8> %tmp3 414} 415 416define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind { 417;CHECK-LABEL: abs_4h: 418;CHECK: abs.4h 419 %tmp1 = load <4 x i16>, <4 x i16>* %A 420 %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1) 421 ret <4 x i16> %tmp3 422} 423 424define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind { 425;CHECK-LABEL: abs_8h: 426;CHECK: abs.8h 427 %tmp1 = load <8 x i16>, <8 x i16>* %A 428 %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1) 429 ret <8 x i16> %tmp3 430} 431 432define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind { 433;CHECK-LABEL: abs_2s: 434;CHECK: abs.2s 435 %tmp1 = load <2 x i32>, <2 x i32>* %A 436 %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1) 437 ret <2 x i32> %tmp3 438} 439 440define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind { 441;CHECK-LABEL: abs_4s: 442;CHECK: abs.4s 443 %tmp1 = load <4 x i32>, <4 x i32>* %A 444 %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1) 445 ret <4 x i32> %tmp3 446} 447 448define <1 x i64> @abs_1d(<1 x i64> %A) nounwind { 449; CHECK-LABEL: abs_1d: 450; CHECK: abs d0, d0 451 %abs = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %A) 452 ret <1 x i64> %abs 453} 454 455define i64 @abs_1d_honestly(i64 %A) nounwind { 456; CHECK-LABEL: abs_1d_honestly: 457; CHECK: abs d0, d0 458 %abs = call i64 @llvm.aarch64.neon.abs.i64(i64 %A) 459 ret i64 %abs 460} 461 462declare <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8>) nounwind readnone 463declare <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8>) nounwind readnone 464declare <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16>) nounwind readnone 465declare <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16>) nounwind readnone 466declare <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32>) nounwind readnone 467declare <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32>) nounwind readnone 468declare <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64>) nounwind readnone 469declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone 470 471define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { 472;CHECK-LABEL: sabal8h: 473;CHECK: sabal.8h 474 %tmp1 = load <8 x i8>, <8 x i8>* %A 475 %tmp2 = load <8 x i8>, <8 x i8>* %B 476 %tmp3 = load <8 x i16>, <8 x i16>* %C 477 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 478 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 479 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 480 ret <8 x i16> %tmp5 481} 482 483define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 484;CHECK-LABEL: sabal4s: 485;CHECK: sabal.4s 486 %tmp1 = load <4 x i16>, <4 x i16>* %A 487 %tmp2 = load <4 x i16>, <4 x i16>* %B 488 %tmp3 = load <4 x i32>, <4 x i32>* %C 489 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 490 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 491 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 492 ret <4 x i32> %tmp5 493} 494 495define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 496;CHECK-LABEL: sabal2d: 497;CHECK: sabal.2d 498 %tmp1 = load <2 x i32>, <2 x i32>* %A 499 %tmp2 = load <2 x i32>, <2 x i32>* %B 500 %tmp3 = load <2 x i64>, <2 x i64>* %C 501 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 502 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 503 %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64> 504 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 505 ret <2 x i64> %tmp5 506} 507 508define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { 509;CHECK-LABEL: sabal2_8h: 510;CHECK: sabal2.8h 511 %load1 = load <16 x i8>, <16 x i8>* %A 512 %load2 = load <16 x i8>, <16 x i8>* %B 513 %tmp3 = load <8 x i16>, <8 x i16>* %C 514 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 515 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 516 %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 517 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 518 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 519 ret <8 x i16> %tmp5 520} 521 522define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 523;CHECK-LABEL: sabal2_4s: 524;CHECK: sabal2.4s 525 %load1 = load <8 x i16>, <8 x i16>* %A 526 %load2 = load <8 x i16>, <8 x i16>* %B 527 %tmp3 = load <4 x i32>, <4 x i32>* %C 528 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 529 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 530 %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 531 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 532 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 533 ret <4 x i32> %tmp5 534} 535 536define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 537;CHECK-LABEL: sabal2_2d: 538;CHECK: sabal2.2d 539 %load1 = load <4 x i32>, <4 x i32>* %A 540 %load2 = load <4 x i32>, <4 x i32>* %B 541 %tmp3 = load <2 x i64>, <2 x i64>* %C 542 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 543 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 544 %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 545 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 546 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 547 ret <2 x i64> %tmp5 548} 549 550define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind { 551;CHECK-LABEL: uabal8h: 552;CHECK: uabal.8h 553 %tmp1 = load <8 x i8>, <8 x i8>* %A 554 %tmp2 = load <8 x i8>, <8 x i8>* %B 555 %tmp3 = load <8 x i16>, <8 x i16>* %C 556 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 557 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 558 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 559 ret <8 x i16> %tmp5 560} 561 562define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind { 563;CHECK-LABEL: uabal4s: 564;CHECK: uabal.4s 565 %tmp1 = load <4 x i16>, <4 x i16>* %A 566 %tmp2 = load <4 x i16>, <4 x i16>* %B 567 %tmp3 = load <4 x i32>, <4 x i32>* %C 568 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 569 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 570 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 571 ret <4 x i32> %tmp5 572} 573 574define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind { 575;CHECK-LABEL: uabal2d: 576;CHECK: uabal.2d 577 %tmp1 = load <2 x i32>, <2 x i32>* %A 578 %tmp2 = load <2 x i32>, <2 x i32>* %B 579 %tmp3 = load <2 x i64>, <2 x i64>* %C 580 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 581 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 582 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 583 ret <2 x i64> %tmp5 584} 585 586define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { 587;CHECK-LABEL: uabal2_8h: 588;CHECK: uabal2.8h 589 %load1 = load <16 x i8>, <16 x i8>* %A 590 %load2 = load <16 x i8>, <16 x i8>* %B 591 %tmp3 = load <8 x i16>, <8 x i16>* %C 592 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 593 %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 594 %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 595 %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16> 596 %tmp5 = add <8 x i16> %tmp3, %tmp4.1 597 ret <8 x i16> %tmp5 598} 599 600define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { 601;CHECK-LABEL: uabal2_4s: 602;CHECK: uabal2.4s 603 %load1 = load <8 x i16>, <8 x i16>* %A 604 %load2 = load <8 x i16>, <8 x i16>* %B 605 %tmp3 = load <4 x i32>, <4 x i32>* %C 606 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 607 %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 608 %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 609 %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32> 610 %tmp5 = add <4 x i32> %tmp3, %tmp4.1 611 ret <4 x i32> %tmp5 612} 613 614define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { 615;CHECK-LABEL: uabal2_2d: 616;CHECK: uabal2.2d 617 %load1 = load <4 x i32>, <4 x i32>* %A 618 %load2 = load <4 x i32>, <4 x i32>* %B 619 %tmp3 = load <2 x i64>, <2 x i64>* %C 620 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 621 %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 622 %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 623 %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64> 624 %tmp5 = add <2 x i64> %tmp3, %tmp4.1 625 ret <2 x i64> %tmp5 626} 627 628define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 629;CHECK-LABEL: saba_8b: 630;CHECK: saba.8b 631 %tmp1 = load <8 x i8>, <8 x i8>* %A 632 %tmp2 = load <8 x i8>, <8 x i8>* %B 633 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 634 %tmp4 = load <8 x i8>, <8 x i8>* %C 635 %tmp5 = add <8 x i8> %tmp3, %tmp4 636 ret <8 x i8> %tmp5 637} 638 639define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { 640;CHECK-LABEL: saba_16b: 641;CHECK: saba.16b 642 %tmp1 = load <16 x i8>, <16 x i8>* %A 643 %tmp2 = load <16 x i8>, <16 x i8>* %B 644 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 645 %tmp4 = load <16 x i8>, <16 x i8>* %C 646 %tmp5 = add <16 x i8> %tmp3, %tmp4 647 ret <16 x i8> %tmp5 648} 649 650define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 651;CHECK-LABEL: saba_4h: 652;CHECK: saba.4h 653 %tmp1 = load <4 x i16>, <4 x i16>* %A 654 %tmp2 = load <4 x i16>, <4 x i16>* %B 655 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 656 %tmp4 = load <4 x i16>, <4 x i16>* %C 657 %tmp5 = add <4 x i16> %tmp3, %tmp4 658 ret <4 x i16> %tmp5 659} 660 661define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { 662;CHECK-LABEL: saba_8h: 663;CHECK: saba.8h 664 %tmp1 = load <8 x i16>, <8 x i16>* %A 665 %tmp2 = load <8 x i16>, <8 x i16>* %B 666 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 667 %tmp4 = load <8 x i16>, <8 x i16>* %C 668 %tmp5 = add <8 x i16> %tmp3, %tmp4 669 ret <8 x i16> %tmp5 670} 671 672define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 673;CHECK-LABEL: saba_2s: 674;CHECK: saba.2s 675 %tmp1 = load <2 x i32>, <2 x i32>* %A 676 %tmp2 = load <2 x i32>, <2 x i32>* %B 677 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 678 %tmp4 = load <2 x i32>, <2 x i32>* %C 679 %tmp5 = add <2 x i32> %tmp3, %tmp4 680 ret <2 x i32> %tmp5 681} 682 683define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { 684;CHECK-LABEL: saba_4s: 685;CHECK: saba.4s 686 %tmp1 = load <4 x i32>, <4 x i32>* %A 687 %tmp2 = load <4 x i32>, <4 x i32>* %B 688 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 689 %tmp4 = load <4 x i32>, <4 x i32>* %C 690 %tmp5 = add <4 x i32> %tmp3, %tmp4 691 ret <4 x i32> %tmp5 692} 693 694define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { 695;CHECK-LABEL: uaba_8b: 696;CHECK: uaba.8b 697 %tmp1 = load <8 x i8>, <8 x i8>* %A 698 %tmp2 = load <8 x i8>, <8 x i8>* %B 699 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 700 %tmp4 = load <8 x i8>, <8 x i8>* %C 701 %tmp5 = add <8 x i8> %tmp3, %tmp4 702 ret <8 x i8> %tmp5 703} 704 705define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { 706;CHECK-LABEL: uaba_16b: 707;CHECK: uaba.16b 708 %tmp1 = load <16 x i8>, <16 x i8>* %A 709 %tmp2 = load <16 x i8>, <16 x i8>* %B 710 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 711 %tmp4 = load <16 x i8>, <16 x i8>* %C 712 %tmp5 = add <16 x i8> %tmp3, %tmp4 713 ret <16 x i8> %tmp5 714} 715 716define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { 717;CHECK-LABEL: uaba_4h: 718;CHECK: uaba.4h 719 %tmp1 = load <4 x i16>, <4 x i16>* %A 720 %tmp2 = load <4 x i16>, <4 x i16>* %B 721 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 722 %tmp4 = load <4 x i16>, <4 x i16>* %C 723 %tmp5 = add <4 x i16> %tmp3, %tmp4 724 ret <4 x i16> %tmp5 725} 726 727define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { 728;CHECK-LABEL: uaba_8h: 729;CHECK: uaba.8h 730 %tmp1 = load <8 x i16>, <8 x i16>* %A 731 %tmp2 = load <8 x i16>, <8 x i16>* %B 732 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 733 %tmp4 = load <8 x i16>, <8 x i16>* %C 734 %tmp5 = add <8 x i16> %tmp3, %tmp4 735 ret <8 x i16> %tmp5 736} 737 738define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { 739;CHECK-LABEL: uaba_2s: 740;CHECK: uaba.2s 741 %tmp1 = load <2 x i32>, <2 x i32>* %A 742 %tmp2 = load <2 x i32>, <2 x i32>* %B 743 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 744 %tmp4 = load <2 x i32>, <2 x i32>* %C 745 %tmp5 = add <2 x i32> %tmp3, %tmp4 746 ret <2 x i32> %tmp5 747} 748 749define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { 750;CHECK-LABEL: uaba_4s: 751;CHECK: uaba.4s 752 %tmp1 = load <4 x i32>, <4 x i32>* %A 753 %tmp2 = load <4 x i32>, <4 x i32>* %B 754 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 755 %tmp4 = load <4 x i32>, <4 x i32>* %C 756 %tmp5 = add <4 x i32> %tmp3, %tmp4 757 ret <4 x i32> %tmp5 758} 759 760; Scalar FABD 761define float @fabds(float %a, float %b) nounwind { 762; CHECK-LABEL: fabds: 763; CHECK: fabd s0, s0, s1 764 %vabd.i = tail call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) nounwind 765 ret float %vabd.i 766} 767 768define double @fabdd(double %a, double %b) nounwind { 769; CHECK-LABEL: fabdd: 770; CHECK: fabd d0, d0, d1 771 %vabd.i = tail call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) nounwind 772 ret double %vabd.i 773} 774 775declare double @llvm.aarch64.sisd.fabd.f64(double, double) nounwind readnone 776declare float @llvm.aarch64.sisd.fabd.f32(float, float) nounwind readnone 777 778define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { 779; CHECK-LABEL: uabdl_from_extract_dup: 780; CHECK-NOT: ext.16b 781; CHECK: uabdl2.2d 782 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 783 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 784 785 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 786 787 %res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 788 %res1 = zext <2 x i32> %res to <2 x i64> 789 ret <2 x i64> %res1 790} 791 792define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { 793; CHECK-LABEL: sabdl_from_extract_dup: 794; CHECK-NOT: ext.16b 795; CHECK: sabdl2.2d 796 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 797 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 798 799 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 800 801 %res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 802 %res1 = zext <2 x i32> %res to <2 x i64> 803 ret <2 x i64> %res1 804} 805 806define <2 x i32> @abspattern1(<2 x i32> %a) nounwind { 807; CHECK-LABEL: abspattern1: 808; CHECK: abs.2s 809; CHECK-NEXT: ret 810 %tmp1neg = sub <2 x i32> zeroinitializer, %a 811 %b = icmp sge <2 x i32> %a, zeroinitializer 812 %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg 813 ret <2 x i32> %abs 814} 815 816define <4 x i16> @abspattern2(<4 x i16> %a) nounwind { 817; CHECK-LABEL: abspattern2: 818; CHECK: abs.4h 819; CHECK-NEXT: ret 820 %tmp1neg = sub <4 x i16> zeroinitializer, %a 821 %b = icmp sgt <4 x i16> %a, zeroinitializer 822 %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg 823 ret <4 x i16> %abs 824} 825 826define <8 x i8> @abspattern3(<8 x i8> %a) nounwind { 827; CHECK-LABEL: abspattern3: 828; CHECK: abs.8b 829; CHECK-NEXT: ret 830 %tmp1neg = sub <8 x i8> zeroinitializer, %a 831 %b = icmp slt <8 x i8> %a, zeroinitializer 832 %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a 833 ret <8 x i8> %abs 834} 835 836define <4 x i32> @abspattern4(<4 x i32> %a) nounwind { 837; CHECK-LABEL: abspattern4: 838; CHECK: abs.4s 839; CHECK-NEXT: ret 840 %tmp1neg = sub <4 x i32> zeroinitializer, %a 841 %b = icmp sge <4 x i32> %a, zeroinitializer 842 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg 843 ret <4 x i32> %abs 844} 845 846define <8 x i16> @abspattern5(<8 x i16> %a) nounwind { 847; CHECK-LABEL: abspattern5: 848; CHECK: abs.8h 849; CHECK-NEXT: ret 850 %tmp1neg = sub <8 x i16> zeroinitializer, %a 851 %b = icmp sgt <8 x i16> %a, zeroinitializer 852 %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg 853 ret <8 x i16> %abs 854} 855 856define <16 x i8> @abspattern6(<16 x i8> %a) nounwind { 857; CHECK-LABEL: abspattern6: 858; CHECK: abs.16b 859; CHECK-NEXT: ret 860 %tmp1neg = sub <16 x i8> zeroinitializer, %a 861 %b = icmp slt <16 x i8> %a, zeroinitializer 862 %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a 863 ret <16 x i8> %abs 864} 865 866define <2 x i64> @abspattern7(<2 x i64> %a) nounwind { 867; CHECK-LABEL: abspattern7: 868; CHECK: abs.2d 869; CHECK-NEXT: ret 870 %tmp1neg = sub <2 x i64> zeroinitializer, %a 871 %b = icmp sle <2 x i64> %a, zeroinitializer 872 %abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a 873 ret <2 x i64> %abs 874} 875