1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4declare i8 @llvm.fshl.i8(i8, i8, i8) 5declare i16 @llvm.fshl.i16(i16, i16, i16) 6declare i32 @llvm.fshl.i32(i32, i32, i32) 7declare i64 @llvm.fshl.i64(i64, i64, i64) 8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 9 10declare i8 @llvm.fshr.i8(i8, i8, i8) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare i32 @llvm.fshr.i32(i32, i32, i32) 13declare i64 @llvm.fshr.i64(i64, i64, i64) 14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 15 16; General case - all operands can be variables. 17 18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 19; CHECK-LABEL: fshl_i32: 20; CHECK: // %bb.0: 21; CHECK-NEXT: orr w9, wzr, #0x20 22; CHECK-NEXT: sub w9, w9, w2 23; CHECK-NEXT: lsl w8, w0, w2 24; CHECK-NEXT: lsr w9, w1, w9 25; CHECK-NEXT: orr w8, w8, w9 26; CHECK-NEXT: tst w2, #0x1f 27; CHECK-NEXT: csel w0, w0, w8, eq 28; CHECK-NEXT: ret 29 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 30 ret i32 %f 31} 32 33; Verify that weird types are minimally supported. 34declare i37 @llvm.fshl.i37(i37, i37, i37) 35define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 36; CHECK-LABEL: fshl_i37: 37; CHECK: // %bb.0: 38; CHECK-NEXT: mov x11, #31883 39; CHECK-NEXT: mov w10, #37 40; CHECK-NEXT: movk x11, #3542, lsl #16 41; CHECK-NEXT: movk x11, #51366, lsl #32 42; CHECK-NEXT: sub x12, x10, x2 43; CHECK-NEXT: and x8, x2, #0x1fffffffff 44; CHECK-NEXT: movk x11, #56679, lsl #48 45; CHECK-NEXT: and x12, x12, #0x1fffffffff 46; CHECK-NEXT: umulh x13, x8, x11 47; CHECK-NEXT: umulh x11, x12, x11 48; CHECK-NEXT: lsr x13, x13, #5 49; CHECK-NEXT: lsr x11, x11, #5 50; CHECK-NEXT: and x9, x1, #0x1fffffffff 51; CHECK-NEXT: msub x8, x13, x10, x8 52; CHECK-NEXT: msub x10, x11, x10, x12 53; CHECK-NEXT: lsl x13, x0, x8 54; CHECK-NEXT: lsr x9, x9, x10 55; CHECK-NEXT: orr x9, x13, x9 56; CHECK-NEXT: cmp x8, #0 // =0 57; CHECK-NEXT: csel x0, x0, x9, eq 58; CHECK-NEXT: ret 59 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 60 ret i37 %f 61} 62 63; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 64 65declare i7 @llvm.fshl.i7(i7, i7, i7) 66define i7 @fshl_i7_const_fold() { 67; CHECK-LABEL: fshl_i7_const_fold: 68; CHECK: // %bb.0: 69; CHECK-NEXT: mov w0, #67 70; CHECK-NEXT: ret 71 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 72 ret i7 %f 73} 74 75define i8 @fshl_i8_const_fold_overshift_1() { 76; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 77; CHECK: // %bb.0: 78; CHECK-NEXT: orr w0, wzr, #0x80 79; CHECK-NEXT: ret 80 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 81 ret i8 %f 82} 83 84define i8 @fshl_i8_const_fold_overshift_2() { 85; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 86; CHECK: // %bb.0: 87; CHECK-NEXT: orr w0, wzr, #0x78 88; CHECK-NEXT: ret 89 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 90 ret i8 %f 91} 92 93define i8 @fshl_i8_const_fold_overshift_3() { 94; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 95; CHECK: // %bb.0: 96; CHECK-NEXT: mov w0, wzr 97; CHECK-NEXT: ret 98 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 99 ret i8 %f 100} 101 102; With constant shift amount, this is 'extr'. 103 104define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 105; CHECK-LABEL: fshl_i32_const_shift: 106; CHECK: // %bb.0: 107; CHECK-NEXT: extr w0, w0, w1, #23 108; CHECK-NEXT: ret 109 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 110 ret i32 %f 111} 112 113; Check modulo math on shift amount. 114 115define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 116; CHECK-LABEL: fshl_i32_const_overshift: 117; CHECK: // %bb.0: 118; CHECK-NEXT: extr w0, w0, w1, #23 119; CHECK-NEXT: ret 120 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 121 ret i32 %f 122} 123 124; 64-bit should also work. 125 126define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 127; CHECK-LABEL: fshl_i64_const_overshift: 128; CHECK: // %bb.0: 129; CHECK-NEXT: extr x0, x0, x1, #23 130; CHECK-NEXT: ret 131 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 132 ret i64 %f 133} 134 135; This should work without any node-specific logic. 136 137define i8 @fshl_i8_const_fold() { 138; CHECK-LABEL: fshl_i8_const_fold: 139; CHECK: // %bb.0: 140; CHECK-NEXT: orr w0, wzr, #0x80 141; CHECK-NEXT: ret 142 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 143 ret i8 %f 144} 145 146; Repeat everything for funnel shift right. 147 148; General case - all operands can be variables. 149 150define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 151; CHECK-LABEL: fshr_i32: 152; CHECK: // %bb.0: 153; CHECK-NEXT: orr w9, wzr, #0x20 154; CHECK-NEXT: sub w9, w9, w2 155; CHECK-NEXT: lsr w8, w1, w2 156; CHECK-NEXT: lsl w9, w0, w9 157; CHECK-NEXT: orr w8, w9, w8 158; CHECK-NEXT: tst w2, #0x1f 159; CHECK-NEXT: csel w0, w1, w8, eq 160; CHECK-NEXT: ret 161 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 162 ret i32 %f 163} 164 165; Verify that weird types are minimally supported. 166declare i37 @llvm.fshr.i37(i37, i37, i37) 167define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 168; CHECK-LABEL: fshr_i37: 169; CHECK: // %bb.0: 170; CHECK-NEXT: mov x11, #31883 171; CHECK-NEXT: mov w10, #37 172; CHECK-NEXT: movk x11, #3542, lsl #16 173; CHECK-NEXT: movk x11, #51366, lsl #32 174; CHECK-NEXT: sub x12, x10, x2 175; CHECK-NEXT: and x9, x2, #0x1fffffffff 176; CHECK-NEXT: movk x11, #56679, lsl #48 177; CHECK-NEXT: and x12, x12, #0x1fffffffff 178; CHECK-NEXT: umulh x13, x9, x11 179; CHECK-NEXT: umulh x11, x12, x11 180; CHECK-NEXT: lsr x13, x13, #5 181; CHECK-NEXT: lsr x11, x11, #5 182; CHECK-NEXT: and x8, x1, #0x1fffffffff 183; CHECK-NEXT: msub x9, x13, x10, x9 184; CHECK-NEXT: msub x10, x11, x10, x12 185; CHECK-NEXT: lsr x8, x8, x9 186; CHECK-NEXT: lsl x10, x0, x10 187; CHECK-NEXT: orr x8, x10, x8 188; CHECK-NEXT: cmp x9, #0 // =0 189; CHECK-NEXT: csel x0, x1, x8, eq 190; CHECK-NEXT: ret 191 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 192 ret i37 %f 193} 194 195; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 196 197declare i7 @llvm.fshr.i7(i7, i7, i7) 198define i7 @fshr_i7_const_fold() { 199; CHECK-LABEL: fshr_i7_const_fold: 200; CHECK: // %bb.0: 201; CHECK-NEXT: orr w0, wzr, #0x1f 202; CHECK-NEXT: ret 203 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 204 ret i7 %f 205} 206 207define i8 @fshr_i8_const_fold_overshift_1() { 208; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 209; CHECK: // %bb.0: 210; CHECK-NEXT: orr w0, wzr, #0xfe 211; CHECK-NEXT: ret 212 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 213 ret i8 %f 214} 215 216define i8 @fshr_i8_const_fold_overshift_2() { 217; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 218; CHECK: // %bb.0: 219; CHECK-NEXT: mov w0, #225 220; CHECK-NEXT: ret 221 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 222 ret i8 %f 223} 224 225define i8 @fshr_i8_const_fold_overshift_3() { 226; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 227; CHECK: // %bb.0: 228; CHECK-NEXT: orr w0, wzr, #0xff 229; CHECK-NEXT: ret 230 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 231 ret i8 %f 232} 233 234; With constant shift amount, this is 'extr'. 235 236define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 237; CHECK-LABEL: fshr_i32_const_shift: 238; CHECK: // %bb.0: 239; CHECK-NEXT: extr w0, w0, w1, #9 240; CHECK-NEXT: ret 241 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 242 ret i32 %f 243} 244 245; Check modulo math on shift amount. 41-32=9. 246 247define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 248; CHECK-LABEL: fshr_i32_const_overshift: 249; CHECK: // %bb.0: 250; CHECK-NEXT: extr w0, w0, w1, #9 251; CHECK-NEXT: ret 252 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 253 ret i32 %f 254} 255 256; 64-bit should also work. 105-64 = 41. 257 258define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 259; CHECK-LABEL: fshr_i64_const_overshift: 260; CHECK: // %bb.0: 261; CHECK-NEXT: extr x0, x0, x1, #41 262; CHECK-NEXT: ret 263 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 264 ret i64 %f 265} 266 267; This should work without any node-specific logic. 268 269define i8 @fshr_i8_const_fold() { 270; CHECK-LABEL: fshr_i8_const_fold: 271; CHECK: // %bb.0: 272; CHECK-NEXT: orr w0, wzr, #0xfe 273; CHECK-NEXT: ret 274 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 275 ret i8 %f 276} 277 278define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 279; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 280; CHECK: // %bb.0: 281; CHECK-NEXT: ret 282 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 283 ret i32 %f 284} 285 286define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 287; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 288; CHECK: // %bb.0: 289; CHECK-NEXT: mov w0, w1 290; CHECK-NEXT: ret 291 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 292 ret i32 %f 293} 294 295define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 296; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 297; CHECK: // %bb.0: 298; CHECK-NEXT: ret 299 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 300 ret <4 x i32> %f 301} 302 303define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 304; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: 305; CHECK: // %bb.0: 306; CHECK-NEXT: mov v0.16b, v1.16b 307; CHECK-NEXT: ret 308 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 309 ret <4 x i32> %f 310} 311 312