1; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s 2 3@var8 = global i8 0 4@var16 = global i16 0 5@var32 = global i32 0 6@var64 = global i64 0 7 8define void @addsub_i8rhs() minsize { 9; CHECK-LABEL: addsub_i8rhs: 10 %val8_tmp = load i8, i8* @var8 11 %lhs32 = load i32, i32* @var32 12 %lhs64 = load i64, i64* @var64 13 14 ; Need this to prevent extension upon load and give a vanilla i8 operand. 15 %val8 = add i8 %val8_tmp, 123 16 17 18; Zero-extending to 32-bits 19 %rhs32_zext = zext i8 %val8 to i32 20 %res32_zext = add i32 %lhs32, %rhs32_zext 21 store volatile i32 %res32_zext, i32* @var32 22; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb 23 24 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 25 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift 26 store volatile i32 %res32_zext_shift, i32* @var32 27; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 28 29 30; Zero-extending to 64-bits 31 %rhs64_zext = zext i8 %val8 to i64 32 %res64_zext = add i64 %lhs64, %rhs64_zext 33 store volatile i64 %res64_zext, i64* @var64 34; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb 35 36 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 37 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 38 store volatile i64 %res64_zext_shift, i64* @var64 39; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 40 41; Sign-extending to 32-bits 42 %rhs32_sext = sext i8 %val8 to i32 43 %res32_sext = add i32 %lhs32, %rhs32_sext 44 store volatile i32 %res32_sext, i32* @var32 45; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb 46 47 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 48 %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift 49 store volatile i32 %res32_sext_shift, i32* @var32 50; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 51 52; Sign-extending to 64-bits 53 %rhs64_sext = sext i8 %val8 to i64 54 %res64_sext = add i64 %lhs64, %rhs64_sext 55 store volatile i64 %res64_sext, i64* @var64 56; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb 57 58 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 59 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 60 store volatile i64 %res64_sext_shift, i64* @var64 61; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 62 63 64; CMP variants 65 %tst = icmp slt i32 %lhs32, %rhs32_zext 66 br i1 %tst, label %end, label %test2 67; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb 68 69test2: 70 %cmp_sext = sext i8 %val8 to i64 71 %tst2 = icmp eq i64 %lhs64, %cmp_sext 72 br i1 %tst2, label %other, label %end 73; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb 74 75other: 76 store volatile i32 %lhs32, i32* @var32 77 ret void 78 79end: 80 ret void 81} 82 83define void @sub_i8rhs() minsize { 84; CHECK-LABEL: sub_i8rhs: 85 %val8_tmp = load i8, i8* @var8 86 %lhs32 = load i32, i32* @var32 87 %lhs64 = load i64, i64* @var64 88 89 ; Need this to prevent extension upon load and give a vanilla i8 operand. 90 %val8 = add i8 %val8_tmp, 123 91 92 93; Zero-extending to 32-bits 94 %rhs32_zext = zext i8 %val8 to i32 95 %res32_zext = sub i32 %lhs32, %rhs32_zext 96 store volatile i32 %res32_zext, i32* @var32 97; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb 98 99 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 100 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift 101 store volatile i32 %res32_zext_shift, i32* @var32 102; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 103 104 105; Zero-extending to 64-bits 106 %rhs64_zext = zext i8 %val8 to i64 107 %res64_zext = sub i64 %lhs64, %rhs64_zext 108 store volatile i64 %res64_zext, i64* @var64 109; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb 110 111 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 112 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 113 store volatile i64 %res64_zext_shift, i64* @var64 114; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 115 116; Sign-extending to 32-bits 117 %rhs32_sext = sext i8 %val8 to i32 118 %res32_sext = sub i32 %lhs32, %rhs32_sext 119 store volatile i32 %res32_sext, i32* @var32 120; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb 121 122 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 123 %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift 124 store volatile i32 %res32_sext_shift, i32* @var32 125; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 126 127; Sign-extending to 64-bits 128 %rhs64_sext = sext i8 %val8 to i64 129 %res64_sext = sub i64 %lhs64, %rhs64_sext 130 store volatile i64 %res64_sext, i64* @var64 131; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb 132 133 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 134 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 135 store volatile i64 %res64_sext_shift, i64* @var64 136; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 137 138 ret void 139} 140 141define void @addsub_i16rhs() minsize { 142; CHECK-LABEL: addsub_i16rhs: 143 %val16_tmp = load i16, i16* @var16 144 %lhs32 = load i32, i32* @var32 145 %lhs64 = load i64, i64* @var64 146 147 ; Need this to prevent extension upon load and give a vanilla i16 operand. 148 %val16 = add i16 %val16_tmp, 123 149 150 151; Zero-extending to 32-bits 152 %rhs32_zext = zext i16 %val16 to i32 153 %res32_zext = add i32 %lhs32, %rhs32_zext 154 store volatile i32 %res32_zext, i32* @var32 155; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth 156 157 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 158 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift 159 store volatile i32 %res32_zext_shift, i32* @var32 160; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 161 162 163; Zero-extending to 64-bits 164 %rhs64_zext = zext i16 %val16 to i64 165 %res64_zext = add i64 %lhs64, %rhs64_zext 166 store volatile i64 %res64_zext, i64* @var64 167; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth 168 169 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 170 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 171 store volatile i64 %res64_zext_shift, i64* @var64 172; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 173 174; Sign-extending to 32-bits 175 %rhs32_sext = sext i16 %val16 to i32 176 %res32_sext = add i32 %lhs32, %rhs32_sext 177 store volatile i32 %res32_sext, i32* @var32 178; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth 179 180 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 181 %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift 182 store volatile i32 %res32_sext_shift, i32* @var32 183; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 184 185; Sign-extending to 64-bits 186 %rhs64_sext = sext i16 %val16 to i64 187 %res64_sext = add i64 %lhs64, %rhs64_sext 188 store volatile i64 %res64_sext, i64* @var64 189; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth 190 191 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 192 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 193 store volatile i64 %res64_sext_shift, i64* @var64 194; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 195 196 197; CMP variants 198 %tst = icmp slt i32 %lhs32, %rhs32_zext 199 br i1 %tst, label %end, label %test2 200; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth 201 202test2: 203 %cmp_sext = sext i16 %val16 to i64 204 %tst2 = icmp eq i64 %lhs64, %cmp_sext 205 br i1 %tst2, label %other, label %end 206; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth 207 208other: 209 store volatile i32 %lhs32, i32* @var32 210 ret void 211 212end: 213 ret void 214} 215 216define void @sub_i16rhs() minsize { 217; CHECK-LABEL: sub_i16rhs: 218 %val16_tmp = load i16, i16* @var16 219 %lhs32 = load i32, i32* @var32 220 %lhs64 = load i64, i64* @var64 221 222 ; Need this to prevent extension upon load and give a vanilla i16 operand. 223 %val16 = add i16 %val16_tmp, 123 224 225 226; Zero-extending to 32-bits 227 %rhs32_zext = zext i16 %val16 to i32 228 %res32_zext = sub i32 %lhs32, %rhs32_zext 229 store volatile i32 %res32_zext, i32* @var32 230; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth 231 232 %rhs32_zext_shift = shl i32 %rhs32_zext, 3 233 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift 234 store volatile i32 %res32_zext_shift, i32* @var32 235; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 236 237 238; Zero-extending to 64-bits 239 %rhs64_zext = zext i16 %val16 to i64 240 %res64_zext = sub i64 %lhs64, %rhs64_zext 241 store volatile i64 %res64_zext, i64* @var64 242; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth 243 244 %rhs64_zext_shift = shl i64 %rhs64_zext, 1 245 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 246 store volatile i64 %res64_zext_shift, i64* @var64 247; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 248 249; Sign-extending to 32-bits 250 %rhs32_sext = sext i16 %val16 to i32 251 %res32_sext = sub i32 %lhs32, %rhs32_sext 252 store volatile i32 %res32_sext, i32* @var32 253; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth 254 255 %rhs32_sext_shift = shl i32 %rhs32_sext, 1 256 %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift 257 store volatile i32 %res32_sext_shift, i32* @var32 258; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 259 260; Sign-extending to 64-bits 261 %rhs64_sext = sext i16 %val16 to i64 262 %res64_sext = sub i64 %lhs64, %rhs64_sext 263 store volatile i64 %res64_sext, i64* @var64 264; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth 265 266 %rhs64_sext_shift = shl i64 %rhs64_sext, 4 267 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 268 store volatile i64 %res64_sext_shift, i64* @var64 269; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 270 271 ret void 272} 273 274; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for 275; example), but the remaining instructions are probably not idiomatic 276; in the face of "add/sub (shifted register)" so I don't intend to. 277define void @addsub_i32rhs(i32 %in32) minsize { 278; CHECK-LABEL: addsub_i32rhs: 279 %val32_tmp = load i32, i32* @var32 280 %lhs64 = load i64, i64* @var64 281 282 %val32 = add i32 %val32_tmp, 123 283 284 %rhs64_zext = zext i32 %in32 to i64 285 %res64_zext = add i64 %lhs64, %rhs64_zext 286 store volatile i64 %res64_zext, i64* @var64 287; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw 288 289 %rhs64_zext2 = zext i32 %val32 to i64 290 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 291 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift 292 store volatile i64 %res64_zext_shift, i64* @var64 293; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 294 295 %rhs64_sext = sext i32 %val32 to i64 296 %res64_sext = add i64 %lhs64, %rhs64_sext 297 store volatile i64 %res64_sext, i64* @var64 298; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw 299 300 %rhs64_sext_shift = shl i64 %rhs64_sext, 2 301 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift 302 store volatile i64 %res64_sext_shift, i64* @var64 303; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 304 305 ret void 306} 307 308define void @sub_i32rhs(i32 %in32) minsize { 309; CHECK-LABEL: sub_i32rhs: 310 %val32_tmp = load i32, i32* @var32 311 %lhs64 = load i64, i64* @var64 312 313 %val32 = add i32 %val32_tmp, 123 314 315 %rhs64_zext = zext i32 %in32 to i64 316 %res64_zext = sub i64 %lhs64, %rhs64_zext 317 store volatile i64 %res64_zext, i64* @var64 318; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw 319 320 %rhs64_zext2 = zext i32 %val32 to i64 321 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 322 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift 323 store volatile i64 %res64_zext_shift, i64* @var64 324; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 325 326 %rhs64_sext = sext i32 %val32 to i64 327 %res64_sext = sub i64 %lhs64, %rhs64_sext 328 store volatile i64 %res64_sext, i64* @var64 329; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw 330 331 %rhs64_sext_shift = shl i64 %rhs64_sext, 2 332 %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift 333 store volatile i64 %res64_sext_shift, i64* @var64 334; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 335 336 ret void 337} 338 339; Check that implicit zext from w reg write is used instead of uxtw form of add. 340define i64 @add_fold_uxtw(i32 %x, i64 %y) { 341; CHECK-LABEL: add_fold_uxtw: 342entry: 343; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 344 %m = and i32 %x, 3 345 %ext = zext i32 %m to i64 346; CHECK-NEXT: add x0, x1, x[[TMP]] 347 %ret = add i64 %y, %ext 348 ret i64 %ret 349} 350 351; Check that implicit zext from w reg write is used instead of uxtw 352; form of sub and that mov WZR is folded to form a neg instruction. 353define i64 @sub_fold_uxtw_xzr(i32 %x) { 354; CHECK-LABEL: sub_fold_uxtw_xzr: 355entry: 356; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 357 %m = and i32 %x, 3 358 %ext = zext i32 %m to i64 359; CHECK-NEXT: neg x0, x[[TMP]] 360 %ret = sub i64 0, %ext 361 ret i64 %ret 362} 363 364; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp. 365define i1 @cmp_fold_uxtw(i32 %x, i64 %y) { 366; CHECK-LABEL: cmp_fold_uxtw: 367entry: 368; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 369 %m = and i32 %x, 3 370 %ext = zext i32 %m to i64 371; CHECK-NEXT: cmp x1, x[[TMP]] 372; CHECK-NEXT: cset 373 %ret = icmp eq i64 %y, %ext 374 ret i1 %ret 375} 376 377; Check that implicit zext from w reg write is used instead of uxtw 378; form of add, leading to madd selection. 379define i64 @madd_fold_uxtw(i32 %x, i64 %y) { 380; CHECK-LABEL: madd_fold_uxtw: 381entry: 382; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 383 %m = and i32 %x, 3 384 %ext = zext i32 %m to i64 385; CHECK-NEXT: madd x0, x1, x1, x[[TMP]] 386 %mul = mul i64 %y, %y 387 %ret = add i64 %mul, %ext 388 ret i64 %ret 389} 390 391; Check that implicit zext from w reg write is used instead of uxtw 392; form of sub, leading to sub/cmp folding. 393; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp. 394define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) { 395; CHECK-LABEL: cmp_sub_fold_uxtw: 396entry: 397; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 398 %m = and i32 %x, 3 399 %ext = zext i32 %m to i64 400; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]] 401; CHECK-NEXT: cset 402 %sub = sub i64 %z, %ext 403 %ret = icmp eq i64 %sub, 0 404 ret i1 %ret 405} 406 407; Check that implicit zext from w reg write is used instead of uxtw 408; form of add and add of -1 gets selected as sub. 409define i64 @add_imm_fold_uxtw(i32 %x) { 410; CHECK-LABEL: add_imm_fold_uxtw: 411entry: 412; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 413 %m = and i32 %x, 3 414 %ext = zext i32 %m to i64 415; CHECK-NEXT: sub x0, x[[TMP]], #1 416 %ret = add i64 %ext, -1 417 ret i64 %ret 418} 419 420; Check that implicit zext from w reg write is used instead of uxtw 421; form of add and add lsl form gets selected. 422define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) { 423; CHECK-LABEL: add_lsl_fold_uxtw: 424entry: 425; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3 426 %m = or i32 %x, 3 427 %ext = zext i32 %m to i64 428 %shift = shl i64 %y, 3 429; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3 430 %ret = add i64 %ext, %shift 431 ret i64 %ret 432} 433