1; Test additions between an i64 and a zero-extended i32. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5declare i64 @foo() 6 7; Check ALGFR. 8define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { 9; CHECK-LABEL: f1: 10; CHECK: algfr %r3, %r4 11; CHECK-DAG: stg %r3, 0(%r5) 12; CHECK-DAG: ipm [[REG:%r[0-5]]] 13; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 14; CHECK: br %r14 15 %bext = zext i32 %b to i64 16 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 17 %val = extractvalue {i64, i1} %t, 0 18 %obit = extractvalue {i64, i1} %t, 1 19 store i64 %val, i64 *%res 20 ret i1 %obit 21} 22 23; Check using the overflow result for a branch. 24define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { 25; CHECK-LABEL: f2: 26; CHECK: algfr %r3, %r4 27; CHECK: stg %r3, 0(%r5) 28; CHECK: jgnle foo@PLT 29; CHECK: br %r14 30 %bext = zext i32 %b to i64 31 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 32 %val = extractvalue {i64, i1} %t, 0 33 %obit = extractvalue {i64, i1} %t, 1 34 store i64 %val, i64 *%res 35 br i1 %obit, label %call, label %exit 36 37call: 38 tail call i64 @foo() 39 br label %exit 40 41exit: 42 ret void 43} 44 45; ... and the same with the inverted direction. 46define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { 47; CHECK-LABEL: f3: 48; CHECK: algfr %r3, %r4 49; CHECK: stg %r3, 0(%r5) 50; CHECK: jgle foo@PLT 51; CHECK: br %r14 52 %bext = zext i32 %b to i64 53 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 54 %val = extractvalue {i64, i1} %t, 0 55 %obit = extractvalue {i64, i1} %t, 1 56 store i64 %val, i64 *%res 57 br i1 %obit, label %exit, label %call 58 59call: 60 tail call i64 @foo() 61 br label %exit 62 63exit: 64 ret void 65} 66 67; Check ALGF with no displacement. 68define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 69; CHECK-LABEL: f4: 70; CHECK: algf %r3, 0(%r4) 71; CHECK-DAG: stg %r3, 0(%r5) 72; CHECK-DAG: ipm [[REG:%r[0-5]]] 73; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 74; CHECK: br %r14 75 %b = load i32, i32 *%src 76 %bext = zext i32 %b to i64 77 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 78 %val = extractvalue {i64, i1} %t, 0 79 %obit = extractvalue {i64, i1} %t, 1 80 store i64 %val, i64 *%res 81 ret i1 %obit 82} 83 84; Check the high end of the aligned ALGF range. 85define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 86; CHECK-LABEL: f5: 87; CHECK: algf %r3, 524284(%r4) 88; CHECK-DAG: stg %r3, 0(%r5) 89; CHECK-DAG: ipm [[REG:%r[0-5]]] 90; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 91; CHECK: br %r14 92 %ptr = getelementptr i32, i32 *%src, i64 131071 93 %b = load i32, i32 *%ptr 94 %bext = zext i32 %b to i64 95 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 96 %val = extractvalue {i64, i1} %t, 0 97 %obit = extractvalue {i64, i1} %t, 1 98 store i64 %val, i64 *%res 99 ret i1 %obit 100} 101 102; Check the next doubleword up, which needs separate address logic. 103; Other sequences besides this one would be OK. 104define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 105; CHECK-LABEL: f6: 106; CHECK: agfi %r4, 524288 107; CHECK: algf %r3, 0(%r4) 108; CHECK-DAG: stg %r3, 0(%r5) 109; CHECK-DAG: ipm [[REG:%r[0-5]]] 110; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 111; CHECK: br %r14 112 %ptr = getelementptr i32, i32 *%src, i64 131072 113 %b = load i32, i32 *%ptr 114 %bext = zext i32 %b to i64 115 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 116 %val = extractvalue {i64, i1} %t, 0 117 %obit = extractvalue {i64, i1} %t, 1 118 store i64 %val, i64 *%res 119 ret i1 %obit 120} 121 122; Check the high end of the negative aligned ALGF range. 123define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 124; CHECK-LABEL: f7: 125; CHECK: algf %r3, -4(%r4) 126; CHECK-DAG: stg %r3, 0(%r5) 127; CHECK-DAG: ipm [[REG:%r[0-5]]] 128; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 129; CHECK: br %r14 130 %ptr = getelementptr i32, i32 *%src, i64 -1 131 %b = load i32, i32 *%ptr 132 %bext = zext i32 %b to i64 133 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 134 %val = extractvalue {i64, i1} %t, 0 135 %obit = extractvalue {i64, i1} %t, 1 136 store i64 %val, i64 *%res 137 ret i1 %obit 138} 139 140; Check the low end of the ALGF range. 141define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 142; CHECK-LABEL: f8: 143; CHECK: algf %r3, -524288(%r4) 144; CHECK-DAG: stg %r3, 0(%r5) 145; CHECK-DAG: ipm [[REG:%r[0-5]]] 146; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 147; CHECK: br %r14 148 %ptr = getelementptr i32, i32 *%src, i64 -131072 149 %b = load i32, i32 *%ptr 150 %bext = zext i32 %b to i64 151 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 152 %val = extractvalue {i64, i1} %t, 0 153 %obit = extractvalue {i64, i1} %t, 1 154 store i64 %val, i64 *%res 155 ret i1 %obit 156} 157 158; Check the next doubleword down, which needs separate address logic. 159; Other sequences besides this one would be OK. 160define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { 161; CHECK-LABEL: f9: 162; CHECK: agfi %r4, -524292 163; CHECK: algf %r3, 0(%r4) 164; CHECK-DAG: stg %r3, 0(%r5) 165; CHECK-DAG: ipm [[REG:%r[0-5]]] 166; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 167; CHECK: br %r14 168 %ptr = getelementptr i32, i32 *%src, i64 -131073 169 %b = load i32, i32 *%ptr 170 %bext = zext i32 %b to i64 171 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 172 %val = extractvalue {i64, i1} %t, 0 173 %obit = extractvalue {i64, i1} %t, 1 174 store i64 %val, i64 *%res 175 ret i1 %obit 176} 177 178; Check that ALGF allows an index. 179define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { 180; CHECK-LABEL: f10: 181; CHECK: algf %r4, 524284({{%r3,%r2|%r2,%r3}}) 182; CHECK-DAG: stg %r4, 0(%r5) 183; CHECK-DAG: ipm [[REG:%r[0-5]]] 184; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 185; CHECK: br %r14 186 %add1 = add i64 %src, %index 187 %add2 = add i64 %add1, 524284 188 %ptr = inttoptr i64 %add2 to i32 * 189 %b = load i32, i32 *%ptr 190 %bext = zext i32 %b to i64 191 %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) 192 %val = extractvalue {i64, i1} %t, 0 193 %obit = extractvalue {i64, i1} %t, 1 194 store i64 %val, i64 *%res 195 ret i1 %obit 196} 197 198; Check that additions of spilled values can use ALGF rather than ALGFR. 199define zeroext i1 @f11(i32 *%ptr0) { 200; CHECK-LABEL: f11: 201; CHECK: brasl %r14, foo@PLT 202; CHECK: algf %r2, 160(%r15) 203; CHECK: br %r14 204 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 205 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 206 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 207 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 208 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 209 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 210 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 211 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 212 %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 213 214 %val0 = load i32, i32 *%ptr0 215 %val1 = load i32, i32 *%ptr1 216 %val2 = load i32, i32 *%ptr2 217 %val3 = load i32, i32 *%ptr3 218 %val4 = load i32, i32 *%ptr4 219 %val5 = load i32, i32 *%ptr5 220 %val6 = load i32, i32 *%ptr6 221 %val7 = load i32, i32 *%ptr7 222 %val8 = load i32, i32 *%ptr8 223 %val9 = load i32, i32 *%ptr9 224 225 %frob0 = add i32 %val0, 100 226 %frob1 = add i32 %val1, 100 227 %frob2 = add i32 %val2, 100 228 %frob3 = add i32 %val3, 100 229 %frob4 = add i32 %val4, 100 230 %frob5 = add i32 %val5, 100 231 %frob6 = add i32 %val6, 100 232 %frob7 = add i32 %val7, 100 233 %frob8 = add i32 %val8, 100 234 %frob9 = add i32 %val9, 100 235 236 store i32 %frob0, i32 *%ptr0 237 store i32 %frob1, i32 *%ptr1 238 store i32 %frob2, i32 *%ptr2 239 store i32 %frob3, i32 *%ptr3 240 store i32 %frob4, i32 *%ptr4 241 store i32 %frob5, i32 *%ptr5 242 store i32 %frob6, i32 *%ptr6 243 store i32 %frob7, i32 *%ptr7 244 store i32 %frob8, i32 *%ptr8 245 store i32 %frob9, i32 *%ptr9 246 247 %ret = call i64 @foo() 248 249 %ext0 = zext i32 %frob0 to i64 250 %ext1 = zext i32 %frob1 to i64 251 %ext2 = zext i32 %frob2 to i64 252 %ext3 = zext i32 %frob3 to i64 253 %ext4 = zext i32 %frob4 to i64 254 %ext5 = zext i32 %frob5 to i64 255 %ext6 = zext i32 %frob6 to i64 256 %ext7 = zext i32 %frob7 to i64 257 %ext8 = zext i32 %frob8 to i64 258 %ext9 = zext i32 %frob9 to i64 259 260 %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %ext0) 261 %add0 = extractvalue {i64, i1} %t0, 0 262 %obit0 = extractvalue {i64, i1} %t0, 1 263 %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %ext1) 264 %add1 = extractvalue {i64, i1} %t1, 0 265 %obit1 = extractvalue {i64, i1} %t1, 1 266 %res1 = or i1 %obit0, %obit1 267 %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %ext2) 268 %add2 = extractvalue {i64, i1} %t2, 0 269 %obit2 = extractvalue {i64, i1} %t2, 1 270 %res2 = or i1 %res1, %obit2 271 %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %ext3) 272 %add3 = extractvalue {i64, i1} %t3, 0 273 %obit3 = extractvalue {i64, i1} %t3, 1 274 %res3 = or i1 %res2, %obit3 275 %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %ext4) 276 %add4 = extractvalue {i64, i1} %t4, 0 277 %obit4 = extractvalue {i64, i1} %t4, 1 278 %res4 = or i1 %res3, %obit4 279 %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %ext5) 280 %add5 = extractvalue {i64, i1} %t5, 0 281 %obit5 = extractvalue {i64, i1} %t5, 1 282 %res5 = or i1 %res4, %obit5 283 %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %ext6) 284 %add6 = extractvalue {i64, i1} %t6, 0 285 %obit6 = extractvalue {i64, i1} %t6, 1 286 %res6 = or i1 %res5, %obit6 287 %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %ext7) 288 %add7 = extractvalue {i64, i1} %t7, 0 289 %obit7 = extractvalue {i64, i1} %t7, 1 290 %res7 = or i1 %res6, %obit7 291 %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %ext8) 292 %add8 = extractvalue {i64, i1} %t8, 0 293 %obit8 = extractvalue {i64, i1} %t8, 1 294 %res8 = or i1 %res7, %obit8 295 %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %ext9) 296 %add9 = extractvalue {i64, i1} %t9, 0 297 %obit9 = extractvalue {i64, i1} %t9, 1 298 %res9 = or i1 %res8, %obit9 299 300 ret i1 %res9 301} 302 303declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone 304 305