1; Test 32-bit subtraction in which the second operand is variable. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5declare i32 @foo() 6 7; Check SR. 8define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { 9; CHECK-LABEL: f1: 10; CHECK: sr %r3, %r4 11; CHECK-DAG: st %r3, 0(%r5) 12; CHECK-DAG: ipm [[REG:%r[0-5]]] 13; CHECK-DAG: afi [[REG]], 1342177280 14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 15; CHECK: br %r14 16 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 17 %val = extractvalue {i32, i1} %t, 0 18 %obit = extractvalue {i32, i1} %t, 1 19 store i32 %val, i32 *%res 20 ret i1 %obit 21} 22 23; Check using the overflow result for a branch. 24define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { 25; CHECK-LABEL: f2: 26; CHECK: sr %r3, %r4 27; CHECK: st %r3, 0(%r5) 28; CHECK: jgo foo@PLT 29; CHECK: br %r14 30 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 31 %val = extractvalue {i32, i1} %t, 0 32 %obit = extractvalue {i32, i1} %t, 1 33 store i32 %val, i32 *%res 34 br i1 %obit, label %call, label %exit 35 36call: 37 tail call i32 @foo() 38 br label %exit 39 40exit: 41 ret void 42} 43 44; ... and the same with the inverted direction. 45define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { 46; CHECK-LABEL: f3: 47; CHECK: sr %r3, %r4 48; CHECK: st %r3, 0(%r5) 49; CHECK: jgno foo@PLT 50; CHECK: br %r14 51 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 52 %val = extractvalue {i32, i1} %t, 0 53 %obit = extractvalue {i32, i1} %t, 1 54 store i32 %val, i32 *%res 55 br i1 %obit, label %exit, label %call 56 57call: 58 tail call i32 @foo() 59 br label %exit 60 61exit: 62 ret void 63} 64 65; Check the low end of the S range. 66define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 67; CHECK-LABEL: f4: 68; CHECK: s %r3, 0(%r4) 69; CHECK-DAG: st %r3, 0(%r5) 70; CHECK-DAG: ipm [[REG:%r[0-5]]] 71; CHECK-DAG: afi [[REG]], 1342177280 72; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 73; CHECK: br %r14 74 %b = load i32, i32 *%src 75 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 76 %val = extractvalue {i32, i1} %t, 0 77 %obit = extractvalue {i32, i1} %t, 1 78 store i32 %val, i32 *%res 79 ret i1 %obit 80} 81 82; Check the high end of the aligned S range. 83define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 84; CHECK-LABEL: f5: 85; CHECK: s %r3, 4092(%r4) 86; CHECK-DAG: st %r3, 0(%r5) 87; CHECK-DAG: ipm [[REG:%r[0-5]]] 88; CHECK-DAG: afi [[REG]], 1342177280 89; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 90; CHECK: br %r14 91 %ptr = getelementptr i32, i32 *%src, i64 1023 92 %b = load i32, i32 *%ptr 93 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 94 %val = extractvalue {i32, i1} %t, 0 95 %obit = extractvalue {i32, i1} %t, 1 96 store i32 %val, i32 *%res 97 ret i1 %obit 98} 99 100; Check the next word up, which should use SY instead of S. 101define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 102; CHECK-LABEL: f6: 103; CHECK: sy %r3, 4096(%r4) 104; CHECK-DAG: st %r3, 0(%r5) 105; CHECK-DAG: ipm [[REG:%r[0-5]]] 106; CHECK-DAG: afi [[REG]], 1342177280 107; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 108; CHECK: br %r14 109 %ptr = getelementptr i32, i32 *%src, i64 1024 110 %b = load i32, i32 *%ptr 111 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 112 %val = extractvalue {i32, i1} %t, 0 113 %obit = extractvalue {i32, i1} %t, 1 114 store i32 %val, i32 *%res 115 ret i1 %obit 116} 117 118; Check the high end of the aligned SY range. 119define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 120; CHECK-LABEL: f7: 121; CHECK: sy %r3, 524284(%r4) 122; CHECK-DAG: st %r3, 0(%r5) 123; CHECK-DAG: ipm [[REG:%r[0-5]]] 124; CHECK-DAG: afi [[REG]], 1342177280 125; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 126; CHECK: br %r14 127 %ptr = getelementptr i32, i32 *%src, i64 131071 128 %b = load i32, i32 *%ptr 129 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 130 %val = extractvalue {i32, i1} %t, 0 131 %obit = extractvalue {i32, i1} %t, 1 132 store i32 %val, i32 *%res 133 ret i1 %obit 134} 135 136; Check the next word up, which needs separate address logic. 137; Other sequences besides this one would be OK. 138define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 139; CHECK-LABEL: f8: 140; CHECK: agfi %r4, 524288 141; CHECK: s %r3, 0(%r4) 142; CHECK-DAG: st %r3, 0(%r5) 143; CHECK-DAG: ipm [[REG:%r[0-5]]] 144; CHECK-DAG: afi [[REG]], 1342177280 145; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 146; CHECK: br %r14 147 %ptr = getelementptr i32, i32 *%src, i64 131072 148 %b = load i32, i32 *%ptr 149 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 150 %val = extractvalue {i32, i1} %t, 0 151 %obit = extractvalue {i32, i1} %t, 1 152 store i32 %val, i32 *%res 153 ret i1 %obit 154} 155 156; Check the high end of the negative aligned SY range. 157define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 158; CHECK-LABEL: f9: 159; CHECK: sy %r3, -4(%r4) 160; CHECK-DAG: st %r3, 0(%r5) 161; CHECK-DAG: ipm [[REG:%r[0-5]]] 162; CHECK-DAG: afi [[REG]], 1342177280 163; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 164; CHECK: br %r14 165 %ptr = getelementptr i32, i32 *%src, i64 -1 166 %b = load i32, i32 *%ptr 167 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 168 %val = extractvalue {i32, i1} %t, 0 169 %obit = extractvalue {i32, i1} %t, 1 170 store i32 %val, i32 *%res 171 ret i1 %obit 172} 173 174; Check the low end of the SY range. 175define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 176; CHECK-LABEL: f10: 177; CHECK: sy %r3, -524288(%r4) 178; CHECK-DAG: st %r3, 0(%r5) 179; CHECK-DAG: ipm [[REG:%r[0-5]]] 180; CHECK-DAG: afi [[REG]], 1342177280 181; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 182; CHECK: br %r14 183 %ptr = getelementptr i32, i32 *%src, i64 -131072 184 %b = load i32, i32 *%ptr 185 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 186 %val = extractvalue {i32, i1} %t, 0 187 %obit = extractvalue {i32, i1} %t, 1 188 store i32 %val, i32 *%res 189 ret i1 %obit 190} 191 192; Check the next word down, which needs separate address logic. 193; Other sequences besides this one would be OK. 194define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { 195; CHECK-LABEL: f11: 196; CHECK: agfi %r4, -524292 197; CHECK: s %r3, 0(%r4) 198; CHECK-DAG: st %r3, 0(%r5) 199; CHECK-DAG: ipm [[REG:%r[0-5]]] 200; CHECK-DAG: afi [[REG]], 1342177280 201; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 202; CHECK: br %r14 203 %ptr = getelementptr i32, i32 *%src, i64 -131073 204 %b = load i32, i32 *%ptr 205 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 206 %val = extractvalue {i32, i1} %t, 0 207 %obit = extractvalue {i32, i1} %t, 1 208 store i32 %val, i32 *%res 209 ret i1 %obit 210} 211 212; Check that S allows an index. 213define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { 214; CHECK-LABEL: f12: 215; CHECK: s %r4, 4092({{%r3,%r2|%r2,%r3}}) 216; CHECK-DAG: st %r4, 0(%r5) 217; CHECK-DAG: ipm [[REG:%r[0-5]]] 218; CHECK-DAG: afi [[REG]], 1342177280 219; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 220; CHECK: br %r14 221 %add1 = add i64 %src, %index 222 %add2 = add i64 %add1, 4092 223 %ptr = inttoptr i64 %add2 to i32 * 224 %b = load i32, i32 *%ptr 225 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 226 %val = extractvalue {i32, i1} %t, 0 227 %obit = extractvalue {i32, i1} %t, 1 228 store i32 %val, i32 *%res 229 ret i1 %obit 230} 231 232; Check that SY allows an index. 233define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { 234; CHECK-LABEL: f13: 235; CHECK: sy %r4, 4096({{%r3,%r2|%r2,%r3}}) 236; CHECK-DAG: st %r4, 0(%r5) 237; CHECK-DAG: ipm [[REG:%r[0-5]]] 238; CHECK-DAG: afi [[REG]], 1342177280 239; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 240; CHECK: br %r14 241 %add1 = add i64 %src, %index 242 %add2 = add i64 %add1, 4096 243 %ptr = inttoptr i64 %add2 to i32 * 244 %b = load i32, i32 *%ptr 245 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 246 %val = extractvalue {i32, i1} %t, 0 247 %obit = extractvalue {i32, i1} %t, 1 248 store i32 %val, i32 *%res 249 ret i1 %obit 250} 251 252; Check that subtractions of spilled values can use S rather than SR. 253define zeroext i1 @f14(i32 *%ptr0) { 254; CHECK-LABEL: f14: 255; CHECK: brasl %r14, foo@PLT 256; CHECK: s %r2, 16{{[04]}}(%r15) 257; CHECK: br %r14 258 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 259 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 260 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 261 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 262 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 263 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 264 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 265 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 266 %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 267 268 %val0 = load i32, i32 *%ptr0 269 %val1 = load i32, i32 *%ptr1 270 %val2 = load i32, i32 *%ptr2 271 %val3 = load i32, i32 *%ptr3 272 %val4 = load i32, i32 *%ptr4 273 %val5 = load i32, i32 *%ptr5 274 %val6 = load i32, i32 *%ptr6 275 %val7 = load i32, i32 *%ptr7 276 %val8 = load i32, i32 *%ptr8 277 %val9 = load i32, i32 *%ptr9 278 279 %ret = call i32 @foo() 280 281 %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %ret, i32 %val0) 282 %add0 = extractvalue {i32, i1} %t0, 0 283 %obit0 = extractvalue {i32, i1} %t0, 1 284 %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add0, i32 %val1) 285 %add1 = extractvalue {i32, i1} %t1, 0 286 %obit1 = extractvalue {i32, i1} %t1, 1 287 %res1 = or i1 %obit0, %obit1 288 %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add1, i32 %val2) 289 %add2 = extractvalue {i32, i1} %t2, 0 290 %obit2 = extractvalue {i32, i1} %t2, 1 291 %res2 = or i1 %res1, %obit2 292 %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add2, i32 %val3) 293 %add3 = extractvalue {i32, i1} %t3, 0 294 %obit3 = extractvalue {i32, i1} %t3, 1 295 %res3 = or i1 %res2, %obit3 296 %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add3, i32 %val4) 297 %add4 = extractvalue {i32, i1} %t4, 0 298 %obit4 = extractvalue {i32, i1} %t4, 1 299 %res4 = or i1 %res3, %obit4 300 %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add4, i32 %val5) 301 %add5 = extractvalue {i32, i1} %t5, 0 302 %obit5 = extractvalue {i32, i1} %t5, 1 303 %res5 = or i1 %res4, %obit5 304 %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add5, i32 %val6) 305 %add6 = extractvalue {i32, i1} %t6, 0 306 %obit6 = extractvalue {i32, i1} %t6, 1 307 %res6 = or i1 %res5, %obit6 308 %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add6, i32 %val7) 309 %add7 = extractvalue {i32, i1} %t7, 0 310 %obit7 = extractvalue {i32, i1} %t7, 1 311 %res7 = or i1 %res6, %obit7 312 %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add7, i32 %val8) 313 %add8 = extractvalue {i32, i1} %t8, 0 314 %obit8 = extractvalue {i32, i1} %t8, 1 315 %res8 = or i1 %res7, %obit8 316 %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add8, i32 %val9) 317 %add9 = extractvalue {i32, i1} %t9, 0 318 %obit9 = extractvalue {i32, i1} %t9, 1 319 %res9 = or i1 %res8, %obit9 320 321 ret i1 %res9 322} 323 324declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone 325 326