1; Test memcmp using CLC, with i32 results.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
6
7; Zero-length comparisons should be optimized away.
8define i32 @f1(i8 *%src1, i8 *%src2) {
9; CHECK-LABEL: f1:
10; CHECK: lhi %r2, 0
11; CHECK: br %r14
12  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
13  ret i32 %res
14}
15
16; Check a case where the result is used as an integer.
17define i32 @f2(i8 *%src1, i8 *%src2) {
18; CHECK-LABEL: f2:
19; CHECK: clc 0(2,%r3), 0(%r2)
20; CHECK: ipm %r2
21; CHECK: sll %r2, 2
22; CHECK: sra %r2, 30
23; CHECK: br %r14
24  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
25  ret i32 %res
26}
27
28; Check a case where the result is tested for equality.
29define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
30; CHECK-LABEL: f3:
31; CHECK: clc 0(3,%r3), 0(%r2)
32; CHECK-NEXT: ber %r14
33; CHECK: br %r14
34  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
35  %cmp = icmp eq i32 %res, 0
36  br i1 %cmp, label %exit, label %store
37
38store:
39  store i32 0, i32 *%dest
40  br label %exit
41
42exit:
43  ret void
44}
45
46; Check a case where the result is tested for inequality.
47define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
48; CHECK-LABEL: f4:
49; CHECK: clc 0(4,%r3), 0(%r2)
50; CHECK-NEXT: blhr %r14
51; CHECK: br %r14
52entry:
53  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
54  %cmp = icmp ne i32 %res, 0
55  br i1 %cmp, label %exit, label %store
56
57store:
58  store i32 0, i32 *%dest
59  br label %exit
60
61exit:
62  ret void
63}
64
65; Check a case where the result is tested via slt.
66define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
67; CHECK-LABEL: f5:
68; CHECK: clc 0(5,%r3), 0(%r2)
69; CHECK-NEXT: bhr %r14
70; CHECK: br %r14
71entry:
72  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
73  %cmp = icmp slt i32 %res, 0
74  br i1 %cmp, label %exit, label %store
75
76store:
77  store i32 0, i32 *%dest
78  br label %exit
79
80exit:
81  ret void
82}
83
84; Check a case where the result is tested for sgt.
85define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
86; CHECK-LABEL: f6:
87; CHECK: clc 0(6,%r3), 0(%r2)
88; CHECK-NEXT: blr %r14
89; CHECK: br %r14
90entry:
91  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
92  %cmp = icmp sgt i32 %res, 0
93  br i1 %cmp, label %exit, label %store
94
95store:
96  store i32 0, i32 *%dest
97  br label %exit
98
99exit:
100  ret void
101}
102
103; Check the upper end of the CLC range.  Here the result is used both as
104; an integer and for branching.
105define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
106; CHECK-LABEL: f7:
107; CHECK: clc 0(256,%r3), 0(%r2)
108; CHECK: ipm %r2
109; CHECK: sll %r2, 2
110; CHECK: sra %r2, 30
111; CHECK: blr %r14
112; CHECK: br %r14
113entry:
114  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
115  %cmp = icmp slt i32 %res, 0
116  br i1 %cmp, label %exit, label %store
117
118store:
119  store i32 0, i32 *%dest
120  br label %exit
121
122exit:
123  ret i32 %res
124}
125
126; 257 bytes needs two CLCs.
127define i32 @f8(i8 *%src1, i8 *%src2) {
128; CHECK-LABEL: f8:
129; CHECK: clc 0(256,%r3), 0(%r2)
130; CHECK: jlh [[LABEL:\..*]]
131; CHECK: clc 256(1,%r3), 256(%r2)
132; CHECK: [[LABEL]]:
133; CHECK: ipm [[REG:%r[0-5]]]
134; CHECK: br %r14
135  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
136  ret i32 %res
137}
138
139; Test a comparison of 258 bytes in which the CC result can be used directly.
140define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
141; CHECK-LABEL: f9:
142; CHECK: clc 0(256,%r3), 0(%r2)
143; CHECK: jlh [[LABEL:\..*]]
144; CHECK: clc 256(1,%r3), 256(%r2)
145; CHECK: [[LABEL]]:
146; CHECK-NEXT: bhr %r14
147; CHECK: br %r14
148entry:
149  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
150  %cmp = icmp slt i32 %res, 0
151  br i1 %cmp, label %exit, label %store
152
153store:
154  store i32 0, i32 *%dest
155  br label %exit
156
157exit:
158  ret void
159}
160
161; Test the largest size that can use two CLCs.
162define i32 @f10(i8 *%src1, i8 *%src2) {
163; CHECK-LABEL: f10:
164; CHECK: clc 0(256,%r3), 0(%r2)
165; CHECK: jlh [[LABEL:\..*]]
166; CHECK: clc 256(256,%r3), 256(%r2)
167; CHECK: [[LABEL]]:
168; CHECK: ipm [[REG:%r[0-5]]]
169; CHECK: br %r14
170  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
171  ret i32 %res
172}
173
174; Test the smallest size that needs 3 CLCs.
175define i32 @f11(i8 *%src1, i8 *%src2) {
176; CHECK-LABEL: f11:
177; CHECK: clc 0(256,%r3), 0(%r2)
178; CHECK: jlh [[LABEL:\..*]]
179; CHECK: clc 256(256,%r3), 256(%r2)
180; CHECK: jlh [[LABEL]]
181; CHECK: clc 512(1,%r3), 512(%r2)
182; CHECK: [[LABEL]]:
183; CHECK: ipm [[REG:%r[0-5]]]
184; CHECK: br %r14
185  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
186  ret i32 %res
187}
188
189; Test the largest size than can use 3 CLCs.
190define i32 @f12(i8 *%src1, i8 *%src2) {
191; CHECK-LABEL: f12:
192; CHECK: clc 0(256,%r3), 0(%r2)
193; CHECK: jlh [[LABEL:\..*]]
194; CHECK: clc 256(256,%r3), 256(%r2)
195; CHECK: jlh [[LABEL]]
196; CHECK: clc 512(256,%r3), 512(%r2)
197; CHECK: [[LABEL]]:
198; CHECK: ipm [[REG:%r[0-5]]]
199; CHECK: br %r14
200  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
201  ret i32 %res
202}
203
204; The next size up uses a loop instead.  We leave the more complicated
205; loop tests to memcpy-01.ll, which shares the same form.
206define i32 @f13(i8 *%src1, i8 *%src2) {
207; CHECK-LABEL: f13:
208; CHECK: lghi [[COUNT:%r[0-5]]], 3
209; CHECK: [[LOOP:.L[^:]*]]:
210; CHECK: clc 0(256,%r3), 0(%r2)
211; CHECK: jlh [[LABEL:\..*]]
212; CHECK-DAG: la %r2, 256(%r2)
213; CHECK-DAG: la %r3, 256(%r3)
214; CHECK: brctg [[COUNT]], [[LOOP]]
215; CHECK: clc 0(1,%r3), 0(%r2)
216; CHECK: [[LABEL]]:
217; CHECK: ipm [[REG:%r[0-5]]]
218; CHECK: br %r14
219  %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)
220  ret i32 %res
221}
222