1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK: vshls8: 5;CHECK: vshl.u8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = shl <8 x i8> %tmp1, %tmp2 9 ret <8 x i8> %tmp3 10} 11 12define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13;CHECK: vshls16: 14;CHECK: vshl.u16 15 %tmp1 = load <4 x i16>* %A 16 %tmp2 = load <4 x i16>* %B 17 %tmp3 = shl <4 x i16> %tmp1, %tmp2 18 ret <4 x i16> %tmp3 19} 20 21define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22;CHECK: vshls32: 23;CHECK: vshl.u32 24 %tmp1 = load <2 x i32>* %A 25 %tmp2 = load <2 x i32>* %B 26 %tmp3 = shl <2 x i32> %tmp1, %tmp2 27 ret <2 x i32> %tmp3 28} 29 30define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 31;CHECK: vshls64: 32;CHECK: vshl.u64 33 %tmp1 = load <1 x i64>* %A 34 %tmp2 = load <1 x i64>* %B 35 %tmp3 = shl <1 x i64> %tmp1, %tmp2 36 ret <1 x i64> %tmp3 37} 38 39define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { 40;CHECK: vshli8: 41;CHECK: vshl.i8 42 %tmp1 = load <8 x i8>* %A 43 %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > 44 ret <8 x i8> %tmp2 45} 46 47define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { 48;CHECK: vshli16: 49;CHECK: vshl.i16 50 %tmp1 = load <4 x i16>* %A 51 %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > 52 ret <4 x i16> %tmp2 53} 54 55define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { 56;CHECK: vshli32: 57;CHECK: vshl.i32 58 %tmp1 = load <2 x i32>* %A 59 %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 > 60 ret <2 x i32> %tmp2 61} 62 63define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { 64;CHECK: vshli64: 65;CHECK: vshl.i64 66 %tmp1 = load <1 x i64>* %A 67 %tmp2 = shl <1 x i64> %tmp1, < i64 63 > 68 ret <1 x i64> %tmp2 69} 70 71define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 72;CHECK: vshlQs8: 73;CHECK: vshl.u8 74 %tmp1 = load <16 x i8>* %A 75 %tmp2 = load <16 x i8>* %B 76 %tmp3 = shl <16 x i8> %tmp1, %tmp2 77 ret <16 x i8> %tmp3 78} 79 80define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 81;CHECK: vshlQs16: 82;CHECK: vshl.u16 83 %tmp1 = load <8 x i16>* %A 84 %tmp2 = load <8 x i16>* %B 85 %tmp3 = shl <8 x i16> %tmp1, %tmp2 86 ret <8 x i16> %tmp3 87} 88 89define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 90;CHECK: vshlQs32: 91;CHECK: vshl.u32 92 %tmp1 = load <4 x i32>* %A 93 %tmp2 = load <4 x i32>* %B 94 %tmp3 = shl <4 x i32> %tmp1, %tmp2 95 ret <4 x i32> %tmp3 96} 97 98define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 99;CHECK: vshlQs64: 100;CHECK: vshl.u64 101 %tmp1 = load <2 x i64>* %A 102 %tmp2 = load <2 x i64>* %B 103 %tmp3 = shl <2 x i64> %tmp1, %tmp2 104 ret <2 x i64> %tmp3 105} 106 107define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { 108;CHECK: vshlQi8: 109;CHECK: vshl.i8 110 %tmp1 = load <16 x i8>* %A 111 %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > 112 ret <16 x i8> %tmp2 113} 114 115define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { 116;CHECK: vshlQi16: 117;CHECK: vshl.i16 118 %tmp1 = load <8 x i16>* %A 119 %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > 120 ret <8 x i16> %tmp2 121} 122 123define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { 124;CHECK: vshlQi32: 125;CHECK: vshl.i32 126 %tmp1 = load <4 x i32>* %A 127 %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > 128 ret <4 x i32> %tmp2 129} 130 131define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { 132;CHECK: vshlQi64: 133;CHECK: vshl.i64 134 %tmp1 = load <2 x i64>* %A 135 %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 > 136 ret <2 x i64> %tmp2 137} 138 139define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 140;CHECK: vlshru8: 141;CHECK: vneg.s8 142;CHECK: vshl.u8 143 %tmp1 = load <8 x i8>* %A 144 %tmp2 = load <8 x i8>* %B 145 %tmp3 = lshr <8 x i8> %tmp1, %tmp2 146 ret <8 x i8> %tmp3 147} 148 149define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 150;CHECK: vlshru16: 151;CHECK: vneg.s16 152;CHECK: vshl.u16 153 %tmp1 = load <4 x i16>* %A 154 %tmp2 = load <4 x i16>* %B 155 %tmp3 = lshr <4 x i16> %tmp1, %tmp2 156 ret <4 x i16> %tmp3 157} 158 159define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 160;CHECK: vlshru32: 161;CHECK: vneg.s32 162;CHECK: vshl.u32 163 %tmp1 = load <2 x i32>* %A 164 %tmp2 = load <2 x i32>* %B 165 %tmp3 = lshr <2 x i32> %tmp1, %tmp2 166 ret <2 x i32> %tmp3 167} 168 169define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 170;CHECK: vlshru64: 171;CHECK: vsub.i64 172;CHECK: vshl.u64 173 %tmp1 = load <1 x i64>* %A 174 %tmp2 = load <1 x i64>* %B 175 %tmp3 = lshr <1 x i64> %tmp1, %tmp2 176 ret <1 x i64> %tmp3 177} 178 179define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind { 180;CHECK: vlshri8: 181;CHECK: vshr.u8 182 %tmp1 = load <8 x i8>* %A 183 %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 184 ret <8 x i8> %tmp2 185} 186 187define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind { 188;CHECK: vlshri16: 189;CHECK: vshr.u16 190 %tmp1 = load <4 x i16>* %A 191 %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > 192 ret <4 x i16> %tmp2 193} 194 195define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind { 196;CHECK: vlshri32: 197;CHECK: vshr.u32 198 %tmp1 = load <2 x i32>* %A 199 %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 > 200 ret <2 x i32> %tmp2 201} 202 203define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind { 204;CHECK: vlshri64: 205;CHECK: vshr.u64 206 %tmp1 = load <1 x i64>* %A 207 %tmp2 = lshr <1 x i64> %tmp1, < i64 64 > 208 ret <1 x i64> %tmp2 209} 210 211define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 212;CHECK: vlshrQu8: 213;CHECK: vneg.s8 214;CHECK: vshl.u8 215 %tmp1 = load <16 x i8>* %A 216 %tmp2 = load <16 x i8>* %B 217 %tmp3 = lshr <16 x i8> %tmp1, %tmp2 218 ret <16 x i8> %tmp3 219} 220 221define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 222;CHECK: vlshrQu16: 223;CHECK: vneg.s16 224;CHECK: vshl.u16 225 %tmp1 = load <8 x i16>* %A 226 %tmp2 = load <8 x i16>* %B 227 %tmp3 = lshr <8 x i16> %tmp1, %tmp2 228 ret <8 x i16> %tmp3 229} 230 231define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 232;CHECK: vlshrQu32: 233;CHECK: vneg.s32 234;CHECK: vshl.u32 235 %tmp1 = load <4 x i32>* %A 236 %tmp2 = load <4 x i32>* %B 237 %tmp3 = lshr <4 x i32> %tmp1, %tmp2 238 ret <4 x i32> %tmp3 239} 240 241define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 242;CHECK: vlshrQu64: 243;CHECK: vsub.i64 244;CHECK: vshl.u64 245 %tmp1 = load <2 x i64>* %A 246 %tmp2 = load <2 x i64>* %B 247 %tmp3 = lshr <2 x i64> %tmp1, %tmp2 248 ret <2 x i64> %tmp3 249} 250 251define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind { 252;CHECK: vlshrQi8: 253;CHECK: vshr.u8 254 %tmp1 = load <16 x i8>* %A 255 %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 256 ret <16 x i8> %tmp2 257} 258 259define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind { 260;CHECK: vlshrQi16: 261;CHECK: vshr.u16 262 %tmp1 = load <8 x i16>* %A 263 %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > 264 ret <8 x i16> %tmp2 265} 266 267define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind { 268;CHECK: vlshrQi32: 269;CHECK: vshr.u32 270 %tmp1 = load <4 x i32>* %A 271 %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > 272 ret <4 x i32> %tmp2 273} 274 275define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind { 276;CHECK: vlshrQi64: 277;CHECK: vshr.u64 278 %tmp1 = load <2 x i64>* %A 279 %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 > 280 ret <2 x i64> %tmp2 281} 282 283; Example that requires splitting and expanding a vector shift. 284define <2 x i64> @update(<2 x i64> %val) nounwind readnone { 285entry: 286 %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1] 287 ret <2 x i64> %shr 288} 289 290define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 291;CHECK: vashrs8: 292;CHECK: vneg.s8 293;CHECK: vshl.s8 294 %tmp1 = load <8 x i8>* %A 295 %tmp2 = load <8 x i8>* %B 296 %tmp3 = ashr <8 x i8> %tmp1, %tmp2 297 ret <8 x i8> %tmp3 298} 299 300define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 301;CHECK: vashrs16: 302;CHECK: vneg.s16 303;CHECK: vshl.s16 304 %tmp1 = load <4 x i16>* %A 305 %tmp2 = load <4 x i16>* %B 306 %tmp3 = ashr <4 x i16> %tmp1, %tmp2 307 ret <4 x i16> %tmp3 308} 309 310define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 311;CHECK: vashrs32: 312;CHECK: vneg.s32 313;CHECK: vshl.s32 314 %tmp1 = load <2 x i32>* %A 315 %tmp2 = load <2 x i32>* %B 316 %tmp3 = ashr <2 x i32> %tmp1, %tmp2 317 ret <2 x i32> %tmp3 318} 319 320define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 321;CHECK: vashrs64: 322;CHECK: vsub.i64 323;CHECK: vshl.s64 324 %tmp1 = load <1 x i64>* %A 325 %tmp2 = load <1 x i64>* %B 326 %tmp3 = ashr <1 x i64> %tmp1, %tmp2 327 ret <1 x i64> %tmp3 328} 329 330define <8 x i8> @vashri8(<8 x i8>* %A) nounwind { 331;CHECK: vashri8: 332;CHECK: vshr.s8 333 %tmp1 = load <8 x i8>* %A 334 %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 335 ret <8 x i8> %tmp2 336} 337 338define <4 x i16> @vashri16(<4 x i16>* %A) nounwind { 339;CHECK: vashri16: 340;CHECK: vshr.s16 341 %tmp1 = load <4 x i16>* %A 342 %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > 343 ret <4 x i16> %tmp2 344} 345 346define <2 x i32> @vashri32(<2 x i32>* %A) nounwind { 347;CHECK: vashri32: 348;CHECK: vshr.s32 349 %tmp1 = load <2 x i32>* %A 350 %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 > 351 ret <2 x i32> %tmp2 352} 353 354define <1 x i64> @vashri64(<1 x i64>* %A) nounwind { 355;CHECK: vashri64: 356;CHECK: vshr.s64 357 %tmp1 = load <1 x i64>* %A 358 %tmp2 = ashr <1 x i64> %tmp1, < i64 64 > 359 ret <1 x i64> %tmp2 360} 361 362define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 363;CHECK: vashrQs8: 364;CHECK: vneg.s8 365;CHECK: vshl.s8 366 %tmp1 = load <16 x i8>* %A 367 %tmp2 = load <16 x i8>* %B 368 %tmp3 = ashr <16 x i8> %tmp1, %tmp2 369 ret <16 x i8> %tmp3 370} 371 372define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 373;CHECK: vashrQs16: 374;CHECK: vneg.s16 375;CHECK: vshl.s16 376 %tmp1 = load <8 x i16>* %A 377 %tmp2 = load <8 x i16>* %B 378 %tmp3 = ashr <8 x i16> %tmp1, %tmp2 379 ret <8 x i16> %tmp3 380} 381 382define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 383;CHECK: vashrQs32: 384;CHECK: vneg.s32 385;CHECK: vshl.s32 386 %tmp1 = load <4 x i32>* %A 387 %tmp2 = load <4 x i32>* %B 388 %tmp3 = ashr <4 x i32> %tmp1, %tmp2 389 ret <4 x i32> %tmp3 390} 391 392define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 393;CHECK: vashrQs64: 394;CHECK: vsub.i64 395;CHECK: vshl.s64 396 %tmp1 = load <2 x i64>* %A 397 %tmp2 = load <2 x i64>* %B 398 %tmp3 = ashr <2 x i64> %tmp1, %tmp2 399 ret <2 x i64> %tmp3 400} 401 402define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind { 403;CHECK: vashrQi8: 404;CHECK: vshr.s8 405 %tmp1 = load <16 x i8>* %A 406 %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > 407 ret <16 x i8> %tmp2 408} 409 410define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind { 411;CHECK: vashrQi16: 412;CHECK: vshr.s16 413 %tmp1 = load <8 x i16>* %A 414 %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > 415 ret <8 x i16> %tmp2 416} 417 418define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind { 419;CHECK: vashrQi32: 420;CHECK: vshr.s32 421 %tmp1 = load <4 x i32>* %A 422 %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > 423 ret <4 x i32> %tmp2 424} 425 426define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind { 427;CHECK: vashrQi64: 428;CHECK: vshr.s64 429 %tmp1 = load <2 x i64>* %A 430 %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 > 431 ret <2 x i64> %tmp2 432} 433