1; Test memcmp using CLC, with i32 results. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size) 6 7; Zero-length comparisons should be optimized away. 8define i32 @f1(i8 *%src1, i8 *%src2) { 9; CHECK-LABEL: f1: 10; CHECK: lhi %r2, 0 11; CHECK: br %r14 12 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0) 13 ret i32 %res 14} 15 16; Check a case where the result is used as an integer. 17define i32 @f2(i8 *%src1, i8 *%src2) { 18; CHECK-LABEL: f2: 19; CHECK: clc 0(2,%r2), 0(%r3) 20; CHECK: ipm [[REG:%r[0-5]]] 21; CHECK: srl [[REG]], 28 22; CHECK: rll %r2, [[REG]], 31 23; CHECK: br %r14 24 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) 25 ret i32 %res 26} 27 28; Check a case where the result is tested for equality. 29define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) { 30; CHECK-LABEL: f3: 31; CHECK: clc 0(3,%r2), 0(%r3) 32; CHECK-NEXT: ber %r14 33; CHECK: br %r14 34 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3) 35 %cmp = icmp eq i32 %res, 0 36 br i1 %cmp, label %exit, label %store 37 38store: 39 store i32 0, i32 *%dest 40 br label %exit 41 42exit: 43 ret void 44} 45 46; Check a case where the result is tested for inequality. 47define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) { 48; CHECK-LABEL: f4: 49; CHECK: clc 0(4,%r2), 0(%r3) 50; CHECK-NEXT: blhr %r14 51; CHECK: br %r14 52entry: 53 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4) 54 %cmp = icmp ne i32 %res, 0 55 br i1 %cmp, label %exit, label %store 56 57store: 58 store i32 0, i32 *%dest 59 br label %exit 60 61exit: 62 ret void 63} 64 65; Check a case where the result is tested via slt. 66define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) { 67; CHECK-LABEL: f5: 68; CHECK: clc 0(5,%r2), 0(%r3) 69; CHECK-NEXT: blr %r14 70; CHECK: br %r14 71entry: 72 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5) 73 %cmp = icmp slt i32 %res, 0 74 br i1 %cmp, label %exit, label %store 75 76store: 77 store i32 0, i32 *%dest 78 br label %exit 79 80exit: 81 ret void 82} 83 84; Check a case where the result is tested for sgt. 85define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) { 86; CHECK-LABEL: f6: 87; CHECK: clc 0(6,%r2), 0(%r3) 88; CHECK-NEXT: bhr %r14 89; CHECK: br %r14 90entry: 91 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6) 92 %cmp = icmp sgt i32 %res, 0 93 br i1 %cmp, label %exit, label %store 94 95store: 96 store i32 0, i32 *%dest 97 br label %exit 98 99exit: 100 ret void 101} 102 103; Check the upper end of the CLC range. Here the result is used both as 104; an integer and for branching. 105define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) { 106; CHECK-LABEL: f7: 107; CHECK: clc 0(256,%r2), 0(%r3) 108; CHECK: ipm [[REG:%r[0-5]]] 109; CHECK: srl [[REG]], 28 110; CHECK: rll %r2, [[REG]], 31 111; CHECK: blr %r14 112; CHECK: br %r14 113entry: 114 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256) 115 %cmp = icmp slt i32 %res, 0 116 br i1 %cmp, label %exit, label %store 117 118store: 119 store i32 0, i32 *%dest 120 br label %exit 121 122exit: 123 ret i32 %res 124} 125 126; 257 bytes needs two CLCs. 127define i32 @f8(i8 *%src1, i8 *%src2) { 128; CHECK-LABEL: f8: 129; CHECK: clc 0(256,%r2), 0(%r3) 130; CHECK: jlh [[LABEL:\..*]] 131; CHECK: clc 256(1,%r2), 256(%r3) 132; CHECK: [[LABEL]]: 133; CHECK: ipm [[REG:%r[0-5]]] 134; CHECK: br %r14 135 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) 136 ret i32 %res 137} 138 139; Test a comparison of 258 bytes in which the CC result can be used directly. 140define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) { 141; CHECK-LABEL: f9: 142; CHECK: clc 0(256,%r2), 0(%r3) 143; CHECK: jlh [[LABEL:\..*]] 144; CHECK: clc 256(1,%r2), 256(%r3) 145; CHECK: [[LABEL]]: 146; CHECK-NEXT: blr %r14 147; CHECK: br %r14 148entry: 149 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) 150 %cmp = icmp slt i32 %res, 0 151 br i1 %cmp, label %exit, label %store 152 153store: 154 store i32 0, i32 *%dest 155 br label %exit 156 157exit: 158 ret void 159} 160 161; Test the largest size that can use two CLCs. 162define i32 @f10(i8 *%src1, i8 *%src2) { 163; CHECK-LABEL: f10: 164; CHECK: clc 0(256,%r2), 0(%r3) 165; CHECK: jlh [[LABEL:\..*]] 166; CHECK: clc 256(256,%r2), 256(%r3) 167; CHECK: [[LABEL]]: 168; CHECK: ipm [[REG:%r[0-5]]] 169; CHECK: br %r14 170 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512) 171 ret i32 %res 172} 173 174; Test the smallest size that needs 3 CLCs. 175define i32 @f11(i8 *%src1, i8 *%src2) { 176; CHECK-LABEL: f11: 177; CHECK: clc 0(256,%r2), 0(%r3) 178; CHECK: jlh [[LABEL:\..*]] 179; CHECK: clc 256(256,%r2), 256(%r3) 180; CHECK: jlh [[LABEL]] 181; CHECK: clc 512(1,%r2), 512(%r3) 182; CHECK: [[LABEL]]: 183; CHECK: ipm [[REG:%r[0-5]]] 184; CHECK: br %r14 185 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513) 186 ret i32 %res 187} 188 189; Test the largest size than can use 3 CLCs. 190define i32 @f12(i8 *%src1, i8 *%src2) { 191; CHECK-LABEL: f12: 192; CHECK: clc 0(256,%r2), 0(%r3) 193; CHECK: jlh [[LABEL:\..*]] 194; CHECK: clc 256(256,%r2), 256(%r3) 195; CHECK: jlh [[LABEL]] 196; CHECK: clc 512(256,%r2), 512(%r3) 197; CHECK: [[LABEL]]: 198; CHECK: ipm [[REG:%r[0-5]]] 199; CHECK: br %r14 200 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768) 201 ret i32 %res 202} 203 204; The next size up uses a loop instead. We leave the more complicated 205; loop tests to memcpy-01.ll, which shares the same form. 206define i32 @f13(i8 *%src1, i8 *%src2) { 207; CHECK-LABEL: f13: 208; CHECK: lghi [[COUNT:%r[0-5]]], 3 209; CHECK: [[LOOP:.L[^:]*]]: 210; CHECK: clc 0(256,%r2), 0(%r3) 211; CHECK: jlh [[LABEL:\..*]] 212; CHECK-DAG: la %r2, 256(%r2) 213; CHECK-DAG: la %r3, 256(%r3) 214; CHECK: brctg [[COUNT]], [[LOOP]] 215; CHECK: clc 0(1,%r2), 0(%r3) 216; CHECK: [[LABEL]]: 217; CHECK: ipm [[REG:%r[0-5]]] 218; CHECK: br %r14 219 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769) 220 ret i32 %res 221} 222