1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK: vqshls8: 5;CHECK: vqshl.s8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10} 11 12define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13;CHECK: vqshls16: 14;CHECK: vqshl.s16 15 %tmp1 = load <4 x i16>* %A 16 %tmp2 = load <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19} 20 21define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22;CHECK: vqshls32: 23;CHECK: vqshl.s32 24 %tmp1 = load <2 x i32>* %A 25 %tmp2 = load <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28} 29 30define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 31;CHECK: vqshls64: 32;CHECK: vqshl.s64 33 %tmp1 = load <1 x i64>* %A 34 %tmp2 = load <1 x i64>* %B 35 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 36 ret <1 x i64> %tmp3 37} 38 39define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 40;CHECK: vqshlu8: 41;CHECK: vqshl.u8 42 %tmp1 = load <8 x i8>* %A 43 %tmp2 = load <8 x i8>* %B 44 %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 45 ret <8 x i8> %tmp3 46} 47 48define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 49;CHECK: vqshlu16: 50;CHECK: vqshl.u16 51 %tmp1 = load <4 x i16>* %A 52 %tmp2 = load <4 x i16>* %B 53 %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 54 ret <4 x i16> %tmp3 55} 56 57define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 58;CHECK: vqshlu32: 59;CHECK: vqshl.u32 60 %tmp1 = load <2 x i32>* %A 61 %tmp2 = load <2 x i32>* %B 62 %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 63 ret <2 x i32> %tmp3 64} 65 66define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 67;CHECK: vqshlu64: 68;CHECK: vqshl.u64 69 %tmp1 = load <1 x i64>* %A 70 %tmp2 = load <1 x i64>* %B 71 %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 72 ret <1 x i64> %tmp3 73} 74 75define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 76;CHECK: vqshlQs8: 77;CHECK: vqshl.s8 78 %tmp1 = load <16 x i8>* %A 79 %tmp2 = load <16 x i8>* %B 80 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 81 ret <16 x i8> %tmp3 82} 83 84define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 85;CHECK: vqshlQs16: 86;CHECK: vqshl.s16 87 %tmp1 = load <8 x i16>* %A 88 %tmp2 = load <8 x i16>* %B 89 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 90 ret <8 x i16> %tmp3 91} 92 93define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 94;CHECK: vqshlQs32: 95;CHECK: vqshl.s32 96 %tmp1 = load <4 x i32>* %A 97 %tmp2 = load <4 x i32>* %B 98 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 99 ret <4 x i32> %tmp3 100} 101 102define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 103;CHECK: vqshlQs64: 104;CHECK: vqshl.s64 105 %tmp1 = load <2 x i64>* %A 106 %tmp2 = load <2 x i64>* %B 107 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 108 ret <2 x i64> %tmp3 109} 110 111define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 112;CHECK: vqshlQu8: 113;CHECK: vqshl.u8 114 %tmp1 = load <16 x i8>* %A 115 %tmp2 = load <16 x i8>* %B 116 %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 117 ret <16 x i8> %tmp3 118} 119 120define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 121;CHECK: vqshlQu16: 122;CHECK: vqshl.u16 123 %tmp1 = load <8 x i16>* %A 124 %tmp2 = load <8 x i16>* %B 125 %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 126 ret <8 x i16> %tmp3 127} 128 129define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 130;CHECK: vqshlQu32: 131;CHECK: vqshl.u32 132 %tmp1 = load <4 x i32>* %A 133 %tmp2 = load <4 x i32>* %B 134 %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 135 ret <4 x i32> %tmp3 136} 137 138define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 139;CHECK: vqshlQu64: 140;CHECK: vqshl.u64 141 %tmp1 = load <2 x i64>* %A 142 %tmp2 = load <2 x i64>* %B 143 %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 144 ret <2 x i64> %tmp3 145} 146 147define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind { 148;CHECK: vqshls_n8: 149;CHECK: vqshl.s8{{.*#7}} 150 %tmp1 = load <8 x i8>* %A 151 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 152 ret <8 x i8> %tmp2 153} 154 155define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind { 156;CHECK: vqshls_n16: 157;CHECK: vqshl.s16{{.*#15}} 158 %tmp1 = load <4 x i16>* %A 159 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 160 ret <4 x i16> %tmp2 161} 162 163define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind { 164;CHECK: vqshls_n32: 165;CHECK: vqshl.s32{{.*#31}} 166 %tmp1 = load <2 x i32>* %A 167 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 168 ret <2 x i32> %tmp2 169} 170 171define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind { 172;CHECK: vqshls_n64: 173;CHECK: vqshl.s64{{.*#63}} 174 %tmp1 = load <1 x i64>* %A 175 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 176 ret <1 x i64> %tmp2 177} 178 179define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind { 180;CHECK: vqshlu_n8: 181;CHECK: vqshl.u8{{.*#7}} 182 %tmp1 = load <8 x i8>* %A 183 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 184 ret <8 x i8> %tmp2 185} 186 187define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind { 188;CHECK: vqshlu_n16: 189;CHECK: vqshl.u16{{.*#15}} 190 %tmp1 = load <4 x i16>* %A 191 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 192 ret <4 x i16> %tmp2 193} 194 195define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind { 196;CHECK: vqshlu_n32: 197;CHECK: vqshl.u32{{.*#31}} 198 %tmp1 = load <2 x i32>* %A 199 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 200 ret <2 x i32> %tmp2 201} 202 203define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind { 204;CHECK: vqshlu_n64: 205;CHECK: vqshl.u64{{.*#63}} 206 %tmp1 = load <1 x i64>* %A 207 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 208 ret <1 x i64> %tmp2 209} 210 211define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind { 212;CHECK: vqshlsu_n8: 213;CHECK: vqshlu.s8 214 %tmp1 = load <8 x i8>* %A 215 %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 216 ret <8 x i8> %tmp2 217} 218 219define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind { 220;CHECK: vqshlsu_n16: 221;CHECK: vqshlu.s16 222 %tmp1 = load <4 x i16>* %A 223 %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) 224 ret <4 x i16> %tmp2 225} 226 227define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind { 228;CHECK: vqshlsu_n32: 229;CHECK: vqshlu.s32 230 %tmp1 = load <2 x i32>* %A 231 %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) 232 ret <2 x i32> %tmp2 233} 234 235define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind { 236;CHECK: vqshlsu_n64: 237;CHECK: vqshlu.s64 238 %tmp1 = load <1 x i64>* %A 239 %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) 240 ret <1 x i64> %tmp2 241} 242 243define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind { 244;CHECK: vqshlQs_n8: 245;CHECK: vqshl.s8{{.*#7}} 246 %tmp1 = load <16 x i8>* %A 247 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 248 ret <16 x i8> %tmp2 249} 250 251define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind { 252;CHECK: vqshlQs_n16: 253;CHECK: vqshl.s16{{.*#15}} 254 %tmp1 = load <8 x i16>* %A 255 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 256 ret <8 x i16> %tmp2 257} 258 259define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind { 260;CHECK: vqshlQs_n32: 261;CHECK: vqshl.s32{{.*#31}} 262 %tmp1 = load <4 x i32>* %A 263 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 264 ret <4 x i32> %tmp2 265} 266 267define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind { 268;CHECK: vqshlQs_n64: 269;CHECK: vqshl.s64{{.*#63}} 270 %tmp1 = load <2 x i64>* %A 271 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 272 ret <2 x i64> %tmp2 273} 274 275define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind { 276;CHECK: vqshlQu_n8: 277;CHECK: vqshl.u8{{.*#7}} 278 %tmp1 = load <16 x i8>* %A 279 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 280 ret <16 x i8> %tmp2 281} 282 283define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind { 284;CHECK: vqshlQu_n16: 285;CHECK: vqshl.u16{{.*#15}} 286 %tmp1 = load <8 x i16>* %A 287 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 288 ret <8 x i16> %tmp2 289} 290 291define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind { 292;CHECK: vqshlQu_n32: 293;CHECK: vqshl.u32{{.*#31}} 294 %tmp1 = load <4 x i32>* %A 295 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 296 ret <4 x i32> %tmp2 297} 298 299define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind { 300;CHECK: vqshlQu_n64: 301;CHECK: vqshl.u64{{.*#63}} 302 %tmp1 = load <2 x i64>* %A 303 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 304 ret <2 x i64> %tmp2 305} 306 307define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind { 308;CHECK: vqshlQsu_n8: 309;CHECK: vqshlu.s8 310 %tmp1 = load <16 x i8>* %A 311 %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) 312 ret <16 x i8> %tmp2 313} 314 315define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind { 316;CHECK: vqshlQsu_n16: 317;CHECK: vqshlu.s16 318 %tmp1 = load <8 x i16>* %A 319 %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) 320 ret <8 x i16> %tmp2 321} 322 323define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind { 324;CHECK: vqshlQsu_n32: 325;CHECK: vqshlu.s32 326 %tmp1 = load <4 x i32>* %A 327 %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) 328 ret <4 x i32> %tmp2 329} 330 331define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind { 332;CHECK: vqshlQsu_n64: 333;CHECK: vqshlu.s64 334 %tmp1 = load <2 x i64>* %A 335 %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) 336 ret <2 x i64> %tmp2 337} 338 339declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 340declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 341declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 342declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 343 344declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 345declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 346declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 347declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 348 349declare <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 350declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 351declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 352declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 353 354declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 355declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 356declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 357declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 358 359declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 360declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 361declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 362declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 363 364declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 365declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 366declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 367declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 368 369define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 370;CHECK: vqrshls8: 371;CHECK: vqrshl.s8 372 %tmp1 = load <8 x i8>* %A 373 %tmp2 = load <8 x i8>* %B 374 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 375 ret <8 x i8> %tmp3 376} 377 378define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 379;CHECK: vqrshls16: 380;CHECK: vqrshl.s16 381 %tmp1 = load <4 x i16>* %A 382 %tmp2 = load <4 x i16>* %B 383 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 384 ret <4 x i16> %tmp3 385} 386 387define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 388;CHECK: vqrshls32: 389;CHECK: vqrshl.s32 390 %tmp1 = load <2 x i32>* %A 391 %tmp2 = load <2 x i32>* %B 392 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 393 ret <2 x i32> %tmp3 394} 395 396define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 397;CHECK: vqrshls64: 398;CHECK: vqrshl.s64 399 %tmp1 = load <1 x i64>* %A 400 %tmp2 = load <1 x i64>* %B 401 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 402 ret <1 x i64> %tmp3 403} 404 405define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 406;CHECK: vqrshlu8: 407;CHECK: vqrshl.u8 408 %tmp1 = load <8 x i8>* %A 409 %tmp2 = load <8 x i8>* %B 410 %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 411 ret <8 x i8> %tmp3 412} 413 414define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 415;CHECK: vqrshlu16: 416;CHECK: vqrshl.u16 417 %tmp1 = load <4 x i16>* %A 418 %tmp2 = load <4 x i16>* %B 419 %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 420 ret <4 x i16> %tmp3 421} 422 423define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 424;CHECK: vqrshlu32: 425;CHECK: vqrshl.u32 426 %tmp1 = load <2 x i32>* %A 427 %tmp2 = load <2 x i32>* %B 428 %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 429 ret <2 x i32> %tmp3 430} 431 432define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 433;CHECK: vqrshlu64: 434;CHECK: vqrshl.u64 435 %tmp1 = load <1 x i64>* %A 436 %tmp2 = load <1 x i64>* %B 437 %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 438 ret <1 x i64> %tmp3 439} 440 441define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 442;CHECK: vqrshlQs8: 443;CHECK: vqrshl.s8 444 %tmp1 = load <16 x i8>* %A 445 %tmp2 = load <16 x i8>* %B 446 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 447 ret <16 x i8> %tmp3 448} 449 450define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 451;CHECK: vqrshlQs16: 452;CHECK: vqrshl.s16 453 %tmp1 = load <8 x i16>* %A 454 %tmp2 = load <8 x i16>* %B 455 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 456 ret <8 x i16> %tmp3 457} 458 459define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 460;CHECK: vqrshlQs32: 461;CHECK: vqrshl.s32 462 %tmp1 = load <4 x i32>* %A 463 %tmp2 = load <4 x i32>* %B 464 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 465 ret <4 x i32> %tmp3 466} 467 468define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 469;CHECK: vqrshlQs64: 470;CHECK: vqrshl.s64 471 %tmp1 = load <2 x i64>* %A 472 %tmp2 = load <2 x i64>* %B 473 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 474 ret <2 x i64> %tmp3 475} 476 477define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 478;CHECK: vqrshlQu8: 479;CHECK: vqrshl.u8 480 %tmp1 = load <16 x i8>* %A 481 %tmp2 = load <16 x i8>* %B 482 %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 483 ret <16 x i8> %tmp3 484} 485 486define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 487;CHECK: vqrshlQu16: 488;CHECK: vqrshl.u16 489 %tmp1 = load <8 x i16>* %A 490 %tmp2 = load <8 x i16>* %B 491 %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 492 ret <8 x i16> %tmp3 493} 494 495define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 496;CHECK: vqrshlQu32: 497;CHECK: vqrshl.u32 498 %tmp1 = load <4 x i32>* %A 499 %tmp2 = load <4 x i32>* %B 500 %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 501 ret <4 x i32> %tmp3 502} 503 504define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 505;CHECK: vqrshlQu64: 506;CHECK: vqrshl.u64 507 %tmp1 = load <2 x i64>* %A 508 %tmp2 = load <2 x i64>* %B 509 %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 510 ret <2 x i64> %tmp3 511} 512 513declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 514declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 515declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 516declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 517 518declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 519declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 520declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 521declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 522 523declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 524declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 525declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 526declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 527 528declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 529declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 530declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 531declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 532