1; Test high-part i64->i128 multiplications. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s 4 5declare i64 @foo() 6 7; Check zero-extended multiplication in which only the high part is used. 8define i64 @f1(i64 %dummy, i64 %a, i64 %b) { 9; CHECK-LABEL: f1: 10; CHECK-NOT: {{%r[234]}} 11; CHECK: mlgr %r2, %r4 12; CHECK: br %r14 13 %ax = zext i64 %a to i128 14 %bx = zext i64 %b to i128 15 %mulx = mul i128 %ax, %bx 16 %highx = lshr i128 %mulx, 64 17 %high = trunc i128 %highx to i64 18 ret i64 %high 19} 20 21; Check sign-extended multiplication in which only the high part is used. 22; This needs a rather convoluted sequence. 23define i64 @f2(i64 %dummy, i64 %a, i64 %b) { 24; CHECK-LABEL: f2: 25; CHECK-DAG: srag [[RES1:%r[0-5]]], %r3, 63 26; CHECK-DAG: srag [[RES2:%r[0-5]]], %r4, 63 27; CHECK-DAG: ngr [[RES1]], %r4 28; CHECK-DAG: ngr [[RES2]], %r3 29; CHECK-DAG: agr [[RES2]], [[RES1]] 30; CHECK-DAG: mlgr %r2, %r4 31; CHECK: sgr %r2, [[RES2]] 32; CHECK: br %r14 33 %ax = sext i64 %a to i128 34 %bx = sext i64 %b to i128 35 %mulx = mul i128 %ax, %bx 36 %highx = lshr i128 %mulx, 64 37 %high = trunc i128 %highx to i64 38 ret i64 %high 39} 40 41; Check zero-extended multiplication in which only part of the high half 42; is used. 43define i64 @f3(i64 %dummy, i64 %a, i64 %b) { 44; CHECK-LABEL: f3: 45; CHECK-NOT: {{%r[234]}} 46; CHECK: mlgr %r2, %r4 47; CHECK: srlg %r2, %r2, 3 48; CHECK: br %r14 49 %ax = zext i64 %a to i128 50 %bx = zext i64 %b to i128 51 %mulx = mul i128 %ax, %bx 52 %highx = lshr i128 %mulx, 67 53 %high = trunc i128 %highx to i64 54 ret i64 %high 55} 56 57; Check zero-extended multiplication in which the result is split into 58; high and low halves. 59define i64 @f4(i64 %dummy, i64 %a, i64 %b) { 60; CHECK-LABEL: f4: 61; CHECK-NOT: {{%r[234]}} 62; CHECK: mlgr %r2, %r4 63; CHECK: ogr %r2, %r3 64; CHECK: br %r14 65 %ax = zext i64 %a to i128 66 %bx = zext i64 %b to i128 67 %mulx = mul i128 %ax, %bx 68 %highx = lshr i128 %mulx, 64 69 %high = trunc i128 %highx to i64 70 %low = trunc i128 %mulx to i64 71 %or = or i64 %high, %low 72 ret i64 %or 73} 74 75; Check division by a constant, which should use multiplication instead. 76define i64 @f5(i64 %dummy, i64 %a) { 77; CHECK-LABEL: f5: 78; CHECK: mlgr %r2, 79; CHECK: srlg %r2, %r2, 80; CHECK: br %r14 81 %res = udiv i64 %a, 1234 82 ret i64 %res 83} 84 85; Check MLG with no displacement. 86define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { 87; CHECK-LABEL: f6: 88; CHECK-NOT: {{%r[234]}} 89; CHECK: mlg %r2, 0(%r4) 90; CHECK: br %r14 91 %b = load i64, i64 *%src 92 %ax = zext i64 %a to i128 93 %bx = zext i64 %b to i128 94 %mulx = mul i128 %ax, %bx 95 %highx = lshr i128 %mulx, 64 96 %high = trunc i128 %highx to i64 97 ret i64 %high 98} 99 100; Check the high end of the aligned MLG range. 101define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { 102; CHECK-LABEL: f7: 103; CHECK: mlg %r2, 524280(%r4) 104; CHECK: br %r14 105 %ptr = getelementptr i64, i64 *%src, i64 65535 106 %b = load i64, i64 *%ptr 107 %ax = zext i64 %a to i128 108 %bx = zext i64 %b to i128 109 %mulx = mul i128 %ax, %bx 110 %highx = lshr i128 %mulx, 64 111 %high = trunc i128 %highx to i64 112 ret i64 %high 113} 114 115; Check the next doubleword up, which requires separate address logic. 116; Other sequences besides this one would be OK. 117define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { 118; CHECK-LABEL: f8: 119; CHECK: agfi %r4, 524288 120; CHECK: mlg %r2, 0(%r4) 121; CHECK: br %r14 122 %ptr = getelementptr i64, i64 *%src, i64 65536 123 %b = load i64, i64 *%ptr 124 %ax = zext i64 %a to i128 125 %bx = zext i64 %b to i128 126 %mulx = mul i128 %ax, %bx 127 %highx = lshr i128 %mulx, 64 128 %high = trunc i128 %highx to i64 129 ret i64 %high 130} 131 132; Check the high end of the negative aligned MLG range. 133define i64 @f9(i64 %dummy, i64 %a, i64 *%src) { 134; CHECK-LABEL: f9: 135; CHECK: mlg %r2, -8(%r4) 136; CHECK: br %r14 137 %ptr = getelementptr i64, i64 *%src, i64 -1 138 %b = load i64, i64 *%ptr 139 %ax = zext i64 %a to i128 140 %bx = zext i64 %b to i128 141 %mulx = mul i128 %ax, %bx 142 %highx = lshr i128 %mulx, 64 143 %high = trunc i128 %highx to i64 144 ret i64 %high 145} 146 147; Check the low end of the MLG range. 148define i64 @f10(i64 %dummy, i64 %a, i64 *%src) { 149; CHECK-LABEL: f10: 150; CHECK: mlg %r2, -524288(%r4) 151; CHECK: br %r14 152 %ptr = getelementptr i64, i64 *%src, i64 -65536 153 %b = load i64, i64 *%ptr 154 %ax = zext i64 %a to i128 155 %bx = zext i64 %b to i128 156 %mulx = mul i128 %ax, %bx 157 %highx = lshr i128 %mulx, 64 158 %high = trunc i128 %highx to i64 159 ret i64 %high 160} 161 162; Check the next doubleword down, which needs separate address logic. 163; Other sequences besides this one would be OK. 164define i64 @f11(i64 *%dest, i64 %a, i64 *%src) { 165; CHECK-LABEL: f11: 166; CHECK: agfi %r4, -524296 167; CHECK: mlg %r2, 0(%r4) 168; CHECK: br %r14 169 %ptr = getelementptr i64, i64 *%src, i64 -65537 170 %b = load i64, i64 *%ptr 171 %ax = zext i64 %a to i128 172 %bx = zext i64 %b to i128 173 %mulx = mul i128 %ax, %bx 174 %highx = lshr i128 %mulx, 64 175 %high = trunc i128 %highx to i64 176 ret i64 %high 177} 178 179; Check that MLG allows an index. 180define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) { 181; CHECK-LABEL: f12: 182; CHECK: mlg %r2, 524287(%r5,%r4) 183; CHECK: br %r14 184 %add1 = add i64 %src, %index 185 %add2 = add i64 %add1, 524287 186 %ptr = inttoptr i64 %add2 to i64 * 187 %b = load i64, i64 *%ptr 188 %ax = zext i64 %a to i128 189 %bx = zext i64 %b to i128 190 %mulx = mul i128 %ax, %bx 191 %highx = lshr i128 %mulx, 64 192 %high = trunc i128 %highx to i64 193 ret i64 %high 194} 195 196; Check that multiplications of spilled values can use MLG rather than MLGR. 197define i64 @f13(i64 *%ptr0) { 198; CHECK-LABEL: f13: 199; CHECK: brasl %r14, foo@PLT 200; CHECK: mlg {{%r[0-9]+}}, 160(%r15) 201; CHECK: br %r14 202 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 203 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 204 %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 205 %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 206 %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 207 %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 208 %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 209 %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 210 %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 211 212 %val0 = load i64, i64 *%ptr0 213 %val1 = load i64, i64 *%ptr1 214 %val2 = load i64, i64 *%ptr2 215 %val3 = load i64, i64 *%ptr3 216 %val4 = load i64, i64 *%ptr4 217 %val5 = load i64, i64 *%ptr5 218 %val6 = load i64, i64 *%ptr6 219 %val7 = load i64, i64 *%ptr7 220 %val8 = load i64, i64 *%ptr8 221 %val9 = load i64, i64 *%ptr9 222 223 %ret = call i64 @foo() 224 225 %retx = zext i64 %ret to i128 226 %val0x = zext i64 %val0 to i128 227 %mul0d = mul i128 %retx, %val0x 228 %mul0x = lshr i128 %mul0d, 64 229 230 %val1x = zext i64 %val1 to i128 231 %mul1d = mul i128 %mul0x, %val1x 232 %mul1x = lshr i128 %mul1d, 64 233 234 %val2x = zext i64 %val2 to i128 235 %mul2d = mul i128 %mul1x, %val2x 236 %mul2x = lshr i128 %mul2d, 64 237 238 %val3x = zext i64 %val3 to i128 239 %mul3d = mul i128 %mul2x, %val3x 240 %mul3x = lshr i128 %mul3d, 64 241 242 %val4x = zext i64 %val4 to i128 243 %mul4d = mul i128 %mul3x, %val4x 244 %mul4x = lshr i128 %mul4d, 64 245 246 %val5x = zext i64 %val5 to i128 247 %mul5d = mul i128 %mul4x, %val5x 248 %mul5x = lshr i128 %mul5d, 64 249 250 %val6x = zext i64 %val6 to i128 251 %mul6d = mul i128 %mul5x, %val6x 252 %mul6x = lshr i128 %mul6d, 64 253 254 %val7x = zext i64 %val7 to i128 255 %mul7d = mul i128 %mul6x, %val7x 256 %mul7x = lshr i128 %mul7d, 64 257 258 %val8x = zext i64 %val8 to i128 259 %mul8d = mul i128 %mul7x, %val8x 260 %mul8x = lshr i128 %mul8d, 64 261 262 %val9x = zext i64 %val9 to i128 263 %mul9d = mul i128 %mul8x, %val9x 264 %mul9x = lshr i128 %mul9d, 64 265 266 %mul9 = trunc i128 %mul9x to i64 267 ret i64 %mul9 268} 269