1; Test memcpy using MVC.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i1) nounwind
6declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i1) nounwind
7declare void @foo(i8 *, i8 *)
8
9; Test a no-op move, i32 version.
10define void @f1(i8* %dest, i8* %src) {
11; CHECK-LABEL: f1:
12; CHECK-NOT: %r2
13; CHECK-NOT: %r3
14; CHECK: br %r14
15  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 0, i1 false)
16  ret void
17}
18
19; Test a no-op move, i64 version.
20define void @f2(i8* %dest, i8* %src) {
21; CHECK-LABEL: f2:
22; CHECK-NOT: %r2
23; CHECK-NOT: %r3
24; CHECK: br %r14
25  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 0, i1 false)
26  ret void
27}
28
29; Test a 1-byte move, i32 version.
30define void @f3(i8* %dest, i8* %src) {
31; CHECK-LABEL: f3:
32; CHECK: mvc 0(1,%r2), 0(%r3)
33; CHECK: br %r14
34  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
35  ret void
36}
37
38; Test a 1-byte move, i64 version.
39define void @f4(i8* %dest, i8* %src) {
40; CHECK-LABEL: f4:
41; CHECK: mvc 0(1,%r2), 0(%r3)
42; CHECK: br %r14
43  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1, i1 false)
44  ret void
45}
46
47; Test the upper range of a single MVC, i32 version.
48define void @f5(i8* %dest, i8* %src) {
49; CHECK-LABEL: f5:
50; CHECK: mvc 0(256,%r2), 0(%r3)
51; CHECK: br %r14
52  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 256, i1 false)
53  ret void
54}
55
56; Test the upper range of a single MVC, i64 version.
57define void @f6(i8* %dest, i8* %src) {
58; CHECK-LABEL: f6:
59; CHECK: mvc 0(256,%r2), 0(%r3)
60; CHECK: br %r14
61  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 256, i1 false)
62  ret void
63}
64
65; Test the first case that needs two MVCs.
66define void @f7(i8* %dest, i8* %src) {
67; CHECK-LABEL: f7:
68; CHECK: mvc 0(256,%r2), 0(%r3)
69; CHECK: mvc 256(1,%r2), 256(%r3)
70; CHECK: br %r14
71  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 257, i1 false)
72  ret void
73}
74
75; Test the last-but-one case that needs two MVCs.
76define void @f8(i8* %dest, i8* %src) {
77; CHECK-LABEL: f8:
78; CHECK: mvc 0(256,%r2), 0(%r3)
79; CHECK: mvc 256(255,%r2), 256(%r3)
80; CHECK: br %r14
81  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 511, i1 false)
82  ret void
83}
84
85; Test the last case that needs two MVCs.
86define void @f9(i8* %dest, i8* %src) {
87; CHECK-LABEL: f9:
88; CHECK: mvc 0(256,%r2), 0(%r3)
89; CHECK: mvc 256(256,%r2), 256(%r3)
90; CHECK: br %r14
91  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 512, i1 false)
92  ret void
93}
94
95; Test an arbitrary value that uses straight-line code.
96define void @f10(i8* %dest, i8* %src) {
97; CHECK-LABEL: f10:
98; CHECK: mvc 0(256,%r2), 0(%r3)
99; CHECK: mvc 256(256,%r2), 256(%r3)
100; CHECK: mvc 512(256,%r2), 512(%r3)
101; CHECK: mvc 768(256,%r2), 768(%r3)
102; CHECK: mvc 1024(255,%r2), 1024(%r3)
103; CHECK: br %r14
104  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1279, i1 false)
105  ret void
106}
107
108; ...and again in cases where not all parts are in range of MVC.
109define void @f11(i8* %srcbase, i8* %destbase) {
110; CHECK-LABEL: f11:
111; CHECK: mvc 4000(256,%r2), 3500(%r3)
112; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
113; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
114; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
115; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
116; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
117; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
118; CHECK: br %r14
119  %dest = getelementptr i8, i8* %srcbase, i64 4000
120  %src = getelementptr i8, i8* %destbase, i64 3500
121  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1279, i1 false)
122  ret void
123}
124
125; ...and again with a destination frame base that goes out of range.
126define void @f12() {
127; CHECK-LABEL: f12:
128; CHECK: brasl %r14, foo@PLT
129; CHECK: mvc 4076(256,%r15), 2100(%r15)
130; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
131; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
132; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
133; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
134; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
135; CHECK: brasl %r14, foo@PLT
136; CHECK: br %r14
137  %arr = alloca [6000 x i8]
138  %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3900
139  %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 1924
140  call void @foo(i8* %dest, i8* %src)
141  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1279, i1 false)
142  call void @foo(i8* %dest, i8* %src)
143  ret void
144}
145
146; ...and again with a source frame base that goes out of range.
147define void @f13() {
148; CHECK-LABEL: f13:
149; CHECK: brasl %r14, foo@PLT
150; CHECK: mvc 200(256,%r15), 3826(%r15)
151; CHECK: mvc 456(256,%r15), 4082(%r15)
152; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
153; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
154; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
155; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
156; CHECK: brasl %r14, foo@PLT
157; CHECK: br %r14
158  %arr = alloca [6000 x i8]
159  %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 24
160  %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3650
161  call void @foo(i8* %dest, i8* %src)
162  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1279, i1 false)
163  call void @foo(i8* %dest, i8* %src)
164  ret void
165}
166
167; Test the last case that is done using straight-line code.
168define void @f14(i8* %dest, i8* %src) {
169; CHECK-LABEL: f14:
170; CHECK: mvc 0(256,%r2), 0(%r3)
171; CHECK: mvc 256(256,%r2), 256(%r3)
172; CHECK: mvc 512(256,%r2), 512(%r3)
173; CHECK: mvc 768(256,%r2), 768(%r3)
174; CHECK: mvc 1024(256,%r2), 1024(%r3)
175; CHECK: mvc 1280(256,%r2), 1280(%r3)
176; CHECK: br %r14
177  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1536, i1 false)
178  ret void
179}
180
181; Test the first case that is done using a loop.
182define void @f15(i8* %dest, i8* %src) {
183; CHECK-LABEL: f15:
184; CHECK: lghi [[COUNT:%r[0-5]]], 6
185; CHECK: [[LABEL:\.L[^:]*]]:
186; CHECK: pfd 2, 768(%r2)
187; CHECK: mvc 0(256,%r2), 0(%r3)
188; CHECK: la %r2, 256(%r2)
189; CHECK: la %r3, 256(%r3)
190; CHECK: brctg [[COUNT]], [[LABEL]]
191; CHECK: mvc 0(1,%r2), 0(%r3)
192; CHECK: br %r14
193  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1537, i1 false)
194  ret void
195}
196
197; ...and again with frame bases, where the base must be loaded into a
198; register before the loop.
199define void @f16() {
200; CHECK-LABEL: f16:
201; CHECK: brasl %r14, foo@PLT
202; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
203; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
204; CHECK: [[LABEL:\.L[^:]*]]:
205; CHECK: pfd 2, 2368([[BASE]])
206; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
207; CHECK: la [[BASE]], 256([[BASE]])
208; CHECK: brctg [[COUNT]], [[LABEL]]
209; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
210; CHECK: brasl %r14, foo@PLT
211; CHECK: br %r14
212  %arr = alloca [3200 x i8]
213  %dest = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 1600
214  %src = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 0
215  call void @foo(i8* %dest, i8* %src)
216  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 1537, i1 false)
217  call void @foo(i8* %dest, i8* %src)
218  ret void
219}
220