1; Test memcpy using MVC.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
6declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
7declare void @foo(i8 *, i8 *)
8
9; Test a no-op move, i32 version.
10define void @f1(i8 *%dest, i8 *%src) {
11; CHECK-LABEL: f1:
12; CHECK-NOT: %r2
13; CHECK-NOT: %r3
14; CHECK: br %r14
15  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1,
16                                       i1 false)
17  ret void
18}
19
20; Test a no-op move, i64 version.
21define void @f2(i8 *%dest, i8 *%src) {
22; CHECK-LABEL: f2:
23; CHECK-NOT: %r2
24; CHECK-NOT: %r3
25; CHECK: br %r14
26  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1,
27                                       i1 false)
28  ret void
29}
30
31; Test a 1-byte move, i32 version.
32define void @f3(i8 *%dest, i8 *%src) {
33; CHECK-LABEL: f3:
34; CHECK: mvc 0(1,%r2), 0(%r3)
35; CHECK: br %r14
36  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1,
37                                       i1 false)
38  ret void
39}
40
41; Test a 1-byte move, i64 version.
42define void @f4(i8 *%dest, i8 *%src) {
43; CHECK-LABEL: f4:
44; CHECK: mvc 0(1,%r2), 0(%r3)
45; CHECK: br %r14
46  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1,
47                                       i1 false)
48  ret void
49}
50
51; Test the upper range of a single MVC, i32 version.
52define void @f5(i8 *%dest, i8 *%src) {
53; CHECK-LABEL: f5:
54; CHECK: mvc 0(256,%r2), 0(%r3)
55; CHECK: br %r14
56  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1,
57                                       i1 false)
58  ret void
59}
60
61; Test the upper range of a single MVC, i64 version.
62define void @f6(i8 *%dest, i8 *%src) {
63; CHECK-LABEL: f6:
64; CHECK: mvc 0(256,%r2), 0(%r3)
65; CHECK: br %r14
66  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1,
67                                       i1 false)
68  ret void
69}
70
71; Test the first case that needs two MVCs.
72define void @f7(i8 *%dest, i8 *%src) {
73; CHECK-LABEL: f7:
74; CHECK: mvc 0(256,%r2), 0(%r3)
75; CHECK: mvc 256(1,%r2), 256(%r3)
76; CHECK: br %r14
77  call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
78                                       i1 false)
79  ret void
80}
81
82; Test the last-but-one case that needs two MVCs.
83define void @f8(i8 *%dest, i8 *%src) {
84; CHECK-LABEL: f8:
85; CHECK: mvc 0(256,%r2), 0(%r3)
86; CHECK: mvc 256(255,%r2), 256(%r3)
87; CHECK: br %r14
88  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1,
89                                       i1 false)
90  ret void
91}
92
93; Test the last case that needs two MVCs.
94define void @f9(i8 *%dest, i8 *%src) {
95; CHECK-LABEL: f9:
96; CHECK: mvc 0(256,%r2), 0(%r3)
97; CHECK: mvc 256(256,%r2), 256(%r3)
98; CHECK: br %r14
99  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1,
100                                       i1 false)
101  ret void
102}
103
104; Test an arbitrary value that uses straight-line code.
105define void @f10(i8 *%dest, i8 *%src) {
106; CHECK-LABEL: f10:
107; CHECK: mvc 0(256,%r2), 0(%r3)
108; CHECK: mvc 256(256,%r2), 256(%r3)
109; CHECK: mvc 512(256,%r2), 512(%r3)
110; CHECK: mvc 768(256,%r2), 768(%r3)
111; CHECK: mvc 1024(255,%r2), 1024(%r3)
112; CHECK: br %r14
113  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
114                                       i1 false)
115  ret void
116}
117
118; ...and again in cases where not all parts are in range of MVC.
119define void @f11(i8 *%srcbase, i8 *%destbase) {
120; CHECK-LABEL: f11:
121; CHECK: mvc 4000(256,%r2), 3500(%r3)
122; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
123; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
124; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
125; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
126; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
127; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
128; CHECK: br %r14
129  %dest = getelementptr i8, i8 *%srcbase, i64 4000
130  %src = getelementptr i8, i8* %destbase, i64 3500
131  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
132                                       i1 false)
133  ret void
134}
135
136; ...and again with a destination frame base that goes out of range.
137define void @f12() {
138; CHECK-LABEL: f12:
139; CHECK: brasl %r14, foo@PLT
140; CHECK: mvc 4076(256,%r15), 2100(%r15)
141; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
142; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
143; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
144; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
145; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
146; CHECK: brasl %r14, foo@PLT
147; CHECK: br %r14
148  %arr = alloca [6000 x i8]
149  %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3900
150  %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 1924
151  call void @foo(i8 *%dest, i8 *%src)
152  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
153                                       i1 false)
154  call void @foo(i8 *%dest, i8 *%src)
155  ret void
156}
157
158; ...and again with a source frame base that goes out of range.
159define void @f13() {
160; CHECK-LABEL: f13:
161; CHECK: brasl %r14, foo@PLT
162; CHECK: mvc 200(256,%r15), 3826(%r15)
163; CHECK: mvc 456(256,%r15), 4082(%r15)
164; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
165; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
166; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
167; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
168; CHECK: brasl %r14, foo@PLT
169; CHECK: br %r14
170  %arr = alloca [6000 x i8]
171  %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 24
172  %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3650
173  call void @foo(i8 *%dest, i8 *%src)
174  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
175                                       i1 false)
176  call void @foo(i8 *%dest, i8 *%src)
177  ret void
178}
179
180; Test the last case that is done using straight-line code.
181define void @f14(i8 *%dest, i8 *%src) {
182; CHECK-LABEL: f14:
183; CHECK: mvc 0(256,%r2), 0(%r3)
184; CHECK: mvc 256(256,%r2), 256(%r3)
185; CHECK: mvc 512(256,%r2), 512(%r3)
186; CHECK: mvc 768(256,%r2), 768(%r3)
187; CHECK: mvc 1024(256,%r2), 1024(%r3)
188; CHECK: mvc 1280(256,%r2), 1280(%r3)
189; CHECK: br %r14
190  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1,
191                                       i1 false)
192  ret void
193}
194
195; Test the first case that is done using a loop.
196define void @f15(i8 *%dest, i8 *%src) {
197; CHECK-LABEL: f15:
198; CHECK: lghi [[COUNT:%r[0-5]]], 6
199; CHECK: [[LABEL:\.L[^:]*]]:
200; CHECK: pfd 2, 768(%r2)
201; CHECK: mvc 0(256,%r2), 0(%r3)
202; CHECK: la %r2, 256(%r2)
203; CHECK: la %r3, 256(%r3)
204; CHECK: brctg [[COUNT]], [[LABEL]]
205; CHECK: mvc 0(1,%r2), 0(%r3)
206; CHECK: br %r14
207  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
208                                       i1 false)
209  ret void
210}
211
212; ...and again with frame bases, where the base must be loaded into a
213; register before the loop.
214define void @f16() {
215; CHECK-LABEL: f16:
216; CHECK: brasl %r14, foo@PLT
217; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
218; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
219; CHECK: [[LABEL:\.L[^:]*]]:
220; CHECK: pfd 2, 2368([[BASE]])
221; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
222; CHECK: la [[BASE]], 256([[BASE]])
223; CHECK: brctg [[COUNT]], [[LABEL]]
224; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
225; CHECK: brasl %r14, foo@PLT
226; CHECK: br %r14
227  %arr = alloca [3200 x i8]
228  %dest = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 1600
229  %src = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 0
230  call void @foo(i8 *%dest, i8 *%src)
231  call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
232                                       i1 false)
233  call void @foo(i8 *%dest, i8 *%src)
234  ret void
235}
236