1; Test 32-bit subtraction in which the second operand is a sign-extended 2; i16 memory value. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 5 6declare i32 @foo() 7 8; Check the low end of the SH range. 9define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 10; CHECK-LABEL: f1: 11; CHECK: sh %r3, 0(%r4) 12; CHECK-DAG: st %r3, 0(%r5) 13; CHECK-DAG: ipm [[REG:%r[0-5]]] 14; CHECK-DAG: afi [[REG]], 1342177280 15; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 16; CHECK: br %r14 17 %half = load i16, i16 *%src 18 %b = sext i16 %half to i32 19 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 20 %val = extractvalue {i32, i1} %t, 0 21 %obit = extractvalue {i32, i1} %t, 1 22 store i32 %val, i32 *%res 23 ret i1 %obit 24} 25 26; Check the high end of the aligned SH range. 27define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 28; CHECK-LABEL: f2: 29; CHECK: sh %r3, 4094(%r4) 30; CHECK-DAG: st %r3, 0(%r5) 31; CHECK-DAG: ipm [[REG:%r[0-5]]] 32; CHECK-DAG: afi [[REG]], 1342177280 33; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 34; CHECK: br %r14 35 %ptr = getelementptr i16, i16 *%src, i64 2047 36 %half = load i16, i16 *%ptr 37 %b = sext i16 %half to i32 38 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 39 %val = extractvalue {i32, i1} %t, 0 40 %obit = extractvalue {i32, i1} %t, 1 41 store i32 %val, i32 *%res 42 ret i1 %obit 43} 44 45; Check the next halfword up, which should use SHY instead of SH. 46define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 47; CHECK-LABEL: f3: 48; CHECK: shy %r3, 4096(%r4) 49; CHECK-DAG: st %r3, 0(%r5) 50; CHECK-DAG: ipm [[REG:%r[0-5]]] 51; CHECK-DAG: afi [[REG]], 1342177280 52; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 53; CHECK: br %r14 54 %ptr = getelementptr i16, i16 *%src, i64 2048 55 %half = load i16, i16 *%ptr 56 %b = sext i16 %half to i32 57 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 58 %val = extractvalue {i32, i1} %t, 0 59 %obit = extractvalue {i32, i1} %t, 1 60 store i32 %val, i32 *%res 61 ret i1 %obit 62} 63 64; Check the high end of the aligned SHY range. 65define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 66; CHECK-LABEL: f4: 67; CHECK: shy %r3, 524286(%r4) 68; CHECK-DAG: st %r3, 0(%r5) 69; CHECK-DAG: ipm [[REG:%r[0-5]]] 70; CHECK-DAG: afi [[REG]], 1342177280 71; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 72; CHECK: br %r14 73 %ptr = getelementptr i16, i16 *%src, i64 262143 74 %half = load i16, i16 *%ptr 75 %b = sext i16 %half to i32 76 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 77 %val = extractvalue {i32, i1} %t, 0 78 %obit = extractvalue {i32, i1} %t, 1 79 store i32 %val, i32 *%res 80 ret i1 %obit 81} 82 83; Check the next halfword up, which needs separate address logic. 84; Other sequences besides this one would be OK. 85define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 86; CHECK-LABEL: f5: 87; CHECK: agfi %r4, 524288 88; CHECK: sh %r3, 0(%r4) 89; CHECK-DAG: st %r3, 0(%r5) 90; CHECK-DAG: ipm [[REG:%r[0-5]]] 91; CHECK-DAG: afi [[REG]], 1342177280 92; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 93; CHECK: br %r14 94 %ptr = getelementptr i16, i16 *%src, i64 262144 95 %half = load i16, i16 *%ptr 96 %b = sext i16 %half to i32 97 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 98 %val = extractvalue {i32, i1} %t, 0 99 %obit = extractvalue {i32, i1} %t, 1 100 store i32 %val, i32 *%res 101 ret i1 %obit 102} 103 104; Check the high end of the negative aligned SHY range. 105define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 106; CHECK-LABEL: f6: 107; CHECK: shy %r3, -2(%r4) 108; CHECK-DAG: st %r3, 0(%r5) 109; CHECK-DAG: ipm [[REG:%r[0-5]]] 110; CHECK-DAG: afi [[REG]], 1342177280 111; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 112; CHECK: br %r14 113 %ptr = getelementptr i16, i16 *%src, i64 -1 114 %half = load i16, i16 *%ptr 115 %b = sext i16 %half to i32 116 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 117 %val = extractvalue {i32, i1} %t, 0 118 %obit = extractvalue {i32, i1} %t, 1 119 store i32 %val, i32 *%res 120 ret i1 %obit 121} 122 123; Check the low end of the SHY range. 124define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 125; CHECK-LABEL: f7: 126; CHECK: shy %r3, -524288(%r4) 127; CHECK-DAG: st %r3, 0(%r5) 128; CHECK-DAG: ipm [[REG:%r[0-5]]] 129; CHECK-DAG: afi [[REG]], 1342177280 130; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 131; CHECK: br %r14 132 %ptr = getelementptr i16, i16 *%src, i64 -262144 133 %half = load i16, i16 *%ptr 134 %b = sext i16 %half to i32 135 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 136 %val = extractvalue {i32, i1} %t, 0 137 %obit = extractvalue {i32, i1} %t, 1 138 store i32 %val, i32 *%res 139 ret i1 %obit 140} 141 142; Check the next halfword down, which needs separate address logic. 143; Other sequences besides this one would be OK. 144define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 145; CHECK-LABEL: f8: 146; CHECK: agfi %r4, -524290 147; CHECK: sh %r3, 0(%r4) 148; CHECK-DAG: st %r3, 0(%r5) 149; CHECK-DAG: ipm [[REG:%r[0-5]]] 150; CHECK-DAG: afi [[REG]], 1342177280 151; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 152; CHECK: br %r14 153 %ptr = getelementptr i16, i16 *%src, i64 -262145 154 %half = load i16, i16 *%ptr 155 %b = sext i16 %half to i32 156 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 157 %val = extractvalue {i32, i1} %t, 0 158 %obit = extractvalue {i32, i1} %t, 1 159 store i32 %val, i32 *%res 160 ret i1 %obit 161} 162 163; Check that SH allows an index. 164define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) { 165; CHECK-LABEL: f9: 166; CHECK: sh %r4, 4094({{%r3,%r2|%r2,%r3}}) 167; CHECK-DAG: st %r4, 0(%r5) 168; CHECK-DAG: ipm [[REG:%r[0-5]]] 169; CHECK-DAG: afi [[REG]], 1342177280 170; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 171; CHECK: br %r14 172 %add1 = add i64 %src, %index 173 %add2 = add i64 %add1, 4094 174 %ptr = inttoptr i64 %add2 to i16 * 175 %half = load i16, i16 *%ptr 176 %b = sext i16 %half to i32 177 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 178 %val = extractvalue {i32, i1} %t, 0 179 %obit = extractvalue {i32, i1} %t, 1 180 store i32 %val, i32 *%res 181 ret i1 %obit 182} 183 184; Check that SHY allows an index. 185define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) { 186; CHECK-LABEL: f10: 187; CHECK: shy %r4, 4096({{%r3,%r2|%r2,%r3}}) 188; CHECK-DAG: st %r4, 0(%r5) 189; CHECK-DAG: ipm [[REG:%r[0-5]]] 190; CHECK-DAG: afi [[REG]], 1342177280 191; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 192; CHECK: br %r14 193 %add1 = add i64 %src, %index 194 %add2 = add i64 %add1, 4096 195 %ptr = inttoptr i64 %add2 to i16 * 196 %half = load i16, i16 *%ptr 197 %b = sext i16 %half to i32 198 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 199 %val = extractvalue {i32, i1} %t, 0 200 %obit = extractvalue {i32, i1} %t, 1 201 store i32 %val, i32 *%res 202 ret i1 %obit 203} 204 205; Check using the overflow result for a branch. 206define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 207; CHECK-LABEL: f11: 208; CHECK: sh %r3, 0(%r4) 209; CHECK: st %r3, 0(%r5) 210; CHECK: jgo foo@PLT 211; CHECK: br %r14 212 %half = load i16, i16 *%src 213 %b = sext i16 %half to i32 214 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 215 %val = extractvalue {i32, i1} %t, 0 216 %obit = extractvalue {i32, i1} %t, 1 217 store i32 %val, i32 *%res 218 br i1 %obit, label %call, label %exit 219 220call: 221 tail call i32 @foo() 222 br label %exit 223 224exit: 225 ret void 226} 227 228; ... and the same with the inverted direction. 229define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { 230; CHECK-LABEL: f12: 231; CHECK: sh %r3, 0(%r4) 232; CHECK: st %r3, 0(%r5) 233; CHECK: jgno foo@PLT 234; CHECK: br %r14 235 %half = load i16, i16 *%src 236 %b = sext i16 %half to i32 237 %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 238 %val = extractvalue {i32, i1} %t, 0 239 %obit = extractvalue {i32, i1} %t, 1 240 store i32 %val, i32 *%res 241 br i1 %obit, label %exit, label %call 242 243call: 244 tail call i32 @foo() 245 br label %exit 246 247exit: 248 ret void 249} 250 251 252declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone 253 254