1; Test 32-bit conditional stores that are presented as selects. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s 4 5declare void @foo(i32 *) 6 7; Test the simple case, with the loaded value first. 8define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { 9; CHECK-LABEL: f1: 10; CHECK-NOT: %r2 11; CHECK: jl [[LABEL:[^ ]*]] 12; CHECK-NOT: %r2 13; CHECK: st %r3, 0(%r2) 14; CHECK: [[LABEL]]: 15; CHECK: br %r14 16 %cond = icmp ult i32 %limit, 420 17 %orig = load i32 , i32 *%ptr 18 %res = select i1 %cond, i32 %orig, i32 %alt 19 store i32 %res, i32 *%ptr 20 ret void 21} 22 23; ...and with the loaded value second 24define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { 25; CHECK-LABEL: f2: 26; CHECK-NOT: %r2 27; CHECK: jhe [[LABEL:[^ ]*]] 28; CHECK-NOT: %r2 29; CHECK: st %r3, 0(%r2) 30; CHECK: [[LABEL]]: 31; CHECK: br %r14 32 %cond = icmp ult i32 %limit, 420 33 %orig = load i32 , i32 *%ptr 34 %res = select i1 %cond, i32 %alt, i32 %orig 35 store i32 %res, i32 *%ptr 36 ret void 37} 38 39; Test cases where the value is explicitly sign-extended to 64 bits, with the 40; loaded value first. 41define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { 42; CHECK-LABEL: f3: 43; CHECK-NOT: %r2 44; CHECK: jl [[LABEL:[^ ]*]] 45; CHECK-NOT: %r2 46; CHECK: st %r3, 0(%r2) 47; CHECK: [[LABEL]]: 48; CHECK: br %r14 49 %cond = icmp ult i32 %limit, 420 50 %orig = load i32 , i32 *%ptr 51 %ext = sext i32 %orig to i64 52 %res = select i1 %cond, i64 %ext, i64 %alt 53 %trunc = trunc i64 %res to i32 54 store i32 %trunc, i32 *%ptr 55 ret void 56} 57 58; ...and with the loaded value second 59define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { 60; CHECK-LABEL: f4: 61; CHECK-NOT: %r2 62; CHECK: jhe [[LABEL:[^ ]*]] 63; CHECK-NOT: %r2 64; CHECK: st %r3, 0(%r2) 65; CHECK: [[LABEL]]: 66; CHECK: br %r14 67 %cond = icmp ult i32 %limit, 420 68 %orig = load i32 , i32 *%ptr 69 %ext = sext i32 %orig to i64 70 %res = select i1 %cond, i64 %alt, i64 %ext 71 %trunc = trunc i64 %res to i32 72 store i32 %trunc, i32 *%ptr 73 ret void 74} 75 76; Test cases where the value is explicitly zero-extended to 32 bits, with the 77; loaded value first. 78define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { 79; CHECK-LABEL: f5: 80; CHECK-NOT: %r2 81; CHECK: jl [[LABEL:[^ ]*]] 82; CHECK-NOT: %r2 83; CHECK: st %r3, 0(%r2) 84; CHECK: [[LABEL]]: 85; CHECK: br %r14 86 %cond = icmp ult i32 %limit, 420 87 %orig = load i32 , i32 *%ptr 88 %ext = zext i32 %orig to i64 89 %res = select i1 %cond, i64 %ext, i64 %alt 90 %trunc = trunc i64 %res to i32 91 store i32 %trunc, i32 *%ptr 92 ret void 93} 94 95; ...and with the loaded value second 96define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { 97; CHECK-LABEL: f6: 98; CHECK-NOT: %r2 99; CHECK: jhe [[LABEL:[^ ]*]] 100; CHECK-NOT: %r2 101; CHECK: st %r3, 0(%r2) 102; CHECK: [[LABEL]]: 103; CHECK: br %r14 104 %cond = icmp ult i32 %limit, 420 105 %orig = load i32 , i32 *%ptr 106 %ext = zext i32 %orig to i64 107 %res = select i1 %cond, i64 %alt, i64 %ext 108 %trunc = trunc i64 %res to i32 109 store i32 %trunc, i32 *%ptr 110 ret void 111} 112 113; Check the high end of the aligned ST range. 114define void @f7(i32 *%base, i32 %alt, i32 %limit) { 115; CHECK-LABEL: f7: 116; CHECK-NOT: %r2 117; CHECK: jl [[LABEL:[^ ]*]] 118; CHECK-NOT: %r2 119; CHECK: st %r3, 4092(%r2) 120; CHECK: [[LABEL]]: 121; CHECK: br %r14 122 %ptr = getelementptr i32, i32 *%base, i64 1023 123 %cond = icmp ult i32 %limit, 420 124 %orig = load i32 , i32 *%ptr 125 %res = select i1 %cond, i32 %orig, i32 %alt 126 store i32 %res, i32 *%ptr 127 ret void 128} 129 130; Check the next word up, which should use STY instead of ST. 131define void @f8(i32 *%base, i32 %alt, i32 %limit) { 132; CHECK-LABEL: f8: 133; CHECK-NOT: %r2 134; CHECK: jl [[LABEL:[^ ]*]] 135; CHECK-NOT: %r2 136; CHECK: sty %r3, 4096(%r2) 137; CHECK: [[LABEL]]: 138; CHECK: br %r14 139 %ptr = getelementptr i32, i32 *%base, i64 1024 140 %cond = icmp ult i32 %limit, 420 141 %orig = load i32 , i32 *%ptr 142 %res = select i1 %cond, i32 %orig, i32 %alt 143 store i32 %res, i32 *%ptr 144 ret void 145} 146 147; Check the high end of the aligned STY range. 148define void @f9(i32 *%base, i32 %alt, i32 %limit) { 149; CHECK-LABEL: f9: 150; CHECK-NOT: %r2 151; CHECK: jl [[LABEL:[^ ]*]] 152; CHECK-NOT: %r2 153; CHECK: sty %r3, 524284(%r2) 154; CHECK: [[LABEL]]: 155; CHECK: br %r14 156 %ptr = getelementptr i32, i32 *%base, i64 131071 157 %cond = icmp ult i32 %limit, 420 158 %orig = load i32 , i32 *%ptr 159 %res = select i1 %cond, i32 %orig, i32 %alt 160 store i32 %res, i32 *%ptr 161 ret void 162} 163 164; Check the next word up, which needs separate address logic. 165; Other sequences besides this one would be OK. 166define void @f10(i32 *%base, i32 %alt, i32 %limit) { 167; CHECK-LABEL: f10: 168; CHECK-NOT: %r2 169; CHECK: jl [[LABEL:[^ ]*]] 170; CHECK-NOT: %r2 171; CHECK: agfi %r2, 524288 172; CHECK: st %r3, 0(%r2) 173; CHECK: [[LABEL]]: 174; CHECK: br %r14 175 %ptr = getelementptr i32, i32 *%base, i64 131072 176 %cond = icmp ult i32 %limit, 420 177 %orig = load i32 , i32 *%ptr 178 %res = select i1 %cond, i32 %orig, i32 %alt 179 store i32 %res, i32 *%ptr 180 ret void 181} 182 183; Check the low end of the STY range. 184define void @f11(i32 *%base, i32 %alt, i32 %limit) { 185; CHECK-LABEL: f11: 186; CHECK-NOT: %r2 187; CHECK: jl [[LABEL:[^ ]*]] 188; CHECK-NOT: %r2 189; CHECK: sty %r3, -524288(%r2) 190; CHECK: [[LABEL]]: 191; CHECK: br %r14 192 %ptr = getelementptr i32, i32 *%base, i64 -131072 193 %cond = icmp ult i32 %limit, 420 194 %orig = load i32 , i32 *%ptr 195 %res = select i1 %cond, i32 %orig, i32 %alt 196 store i32 %res, i32 *%ptr 197 ret void 198} 199 200; Check the next word down, which needs separate address logic. 201; Other sequences besides this one would be OK. 202define void @f12(i32 *%base, i32 %alt, i32 %limit) { 203; CHECK-LABEL: f12: 204; CHECK-NOT: %r2 205; CHECK: jl [[LABEL:[^ ]*]] 206; CHECK-NOT: %r2 207; CHECK: agfi %r2, -524292 208; CHECK: st %r3, 0(%r2) 209; CHECK: [[LABEL]]: 210; CHECK: br %r14 211 %ptr = getelementptr i32, i32 *%base, i64 -131073 212 %cond = icmp ult i32 %limit, 420 213 %orig = load i32 , i32 *%ptr 214 %res = select i1 %cond, i32 %orig, i32 %alt 215 store i32 %res, i32 *%ptr 216 ret void 217} 218 219; Check that STY allows an index. 220define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) { 221; CHECK-LABEL: f13: 222; CHECK-NOT: %r2 223; CHECK: jl [[LABEL:[^ ]*]] 224; CHECK-NOT: %r2 225; CHECK: sty %r4, 4096(%r3,%r2) 226; CHECK: [[LABEL]]: 227; CHECK: br %r14 228 %add1 = add i64 %base, %index 229 %add2 = add i64 %add1, 4096 230 %ptr = inttoptr i64 %add2 to i32 * 231 %cond = icmp ult i32 %limit, 420 232 %orig = load i32 , i32 *%ptr 233 %res = select i1 %cond, i32 %orig, i32 %alt 234 store i32 %res, i32 *%ptr 235 ret void 236} 237 238; Check that volatile loads are not matched. 239define void @f14(i32 *%ptr, i32 %alt, i32 %limit) { 240; CHECK-LABEL: f14: 241; CHECK: l {{%r[0-5]}}, 0(%r2) 242; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 243; CHECK: [[LABEL]]: 244; CHECK: st {{%r[0-5]}}, 0(%r2) 245; CHECK: br %r14 246 %cond = icmp ult i32 %limit, 420 247 %orig = load volatile i32 , i32 *%ptr 248 %res = select i1 %cond, i32 %orig, i32 %alt 249 store i32 %res, i32 *%ptr 250 ret void 251} 252 253; ...likewise stores. In this case we should have a conditional load into %r3. 254define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { 255; CHECK-LABEL: f15: 256; CHECK: jhe [[LABEL:[^ ]*]] 257; CHECK: l %r3, 0(%r2) 258; CHECK: [[LABEL]]: 259; CHECK: st %r3, 0(%r2) 260; CHECK: br %r14 261 %cond = icmp ult i32 %limit, 420 262 %orig = load i32 , i32 *%ptr 263 %res = select i1 %cond, i32 %orig, i32 %alt 264 store volatile i32 %res, i32 *%ptr 265 ret void 266} 267 268; Check that atomic loads are not matched. The transformation is OK for 269; the "unordered" case tested here, but since we don't try to handle atomic 270; operations at all in this context, it seems better to assert that than 271; to restrict the test to a stronger ordering. 272define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { 273; FIXME: should use a normal load instead of CS. 274; CHECK-LABEL: f16: 275; CHECK: l {{%r[0-5]}}, 0(%r2) 276; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] 277; CHECK: [[LABEL]]: 278; CHECK: st {{%r[0-5]}}, 0(%r2) 279; CHECK: br %r14 280 %cond = icmp ult i32 %limit, 420 281 %orig = load atomic i32 , i32 *%ptr unordered, align 4 282 %res = select i1 %cond, i32 %orig, i32 %alt 283 store i32 %res, i32 *%ptr 284 ret void 285} 286 287; ...likewise stores. 288define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { 289; FIXME: should use a normal store instead of CS. 290; CHECK-LABEL: f17: 291; CHECK: jhe [[LABEL:[^ ]*]] 292; CHECK: l %r3, 0(%r2) 293; CHECK: [[LABEL]]: 294; CHECK: st %r3, 0(%r2) 295; CHECK: br %r14 296 %cond = icmp ult i32 %limit, 420 297 %orig = load i32 , i32 *%ptr 298 %res = select i1 %cond, i32 %orig, i32 %alt 299 store atomic i32 %res, i32 *%ptr unordered, align 4 300 ret void 301} 302 303; Try a frame index base. 304define void @f18(i32 %alt, i32 %limit) { 305; CHECK-LABEL: f18: 306; CHECK: brasl %r14, foo@PLT 307; CHECK-NOT: %r15 308; CHECK: jl [[LABEL:[^ ]*]] 309; CHECK-NOT: %r15 310; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15) 311; CHECK: [[LABEL]]: 312; CHECK: brasl %r14, foo@PLT 313; CHECK: br %r14 314 %ptr = alloca i32 315 call void @foo(i32 *%ptr) 316 %cond = icmp ult i32 %limit, 420 317 %orig = load i32 , i32 *%ptr 318 %res = select i1 %cond, i32 %orig, i32 %alt 319 store i32 %res, i32 *%ptr 320 call void @foo(i32 *%ptr) 321 ret void 322} 323