1; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
2; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
4; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
5
6declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
7
8define float @f1(float %f1, float %f2, float %acc) {
9; CHECK-LABEL: f1:
10; CHECK-SCALAR: msebr %f4, %f0, %f2
11; CHECK-SCALAR: ler %f0, %f4
12; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
13; CHECK: br %r14
14  %negacc = fsub float -0.0, %acc
15  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
16  ret float %res
17}
18
19define float @f2(float %f1, float *%ptr, float %acc) {
20; CHECK-LABEL: f2:
21; CHECK: mseb %f2, %f0, 0(%r2)
22; CHECK-SCALAR: ler %f0, %f2
23; CHECK-VECTOR: ldr %f0, %f2
24; CHECK: br %r14
25  %f2 = load float, float *%ptr
26  %negacc = fsub float -0.0, %acc
27  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
28  ret float %res
29}
30
31define float @f3(float %f1, float *%base, float %acc) {
32; CHECK-LABEL: f3:
33; CHECK: mseb %f2, %f0, 4092(%r2)
34; CHECK-SCALAR: ler %f0, %f2
35; CHECK-VECTOR: ldr %f0, %f2
36; CHECK: br %r14
37  %ptr = getelementptr float, float *%base, i64 1023
38  %f2 = load float, float *%ptr
39  %negacc = fsub float -0.0, %acc
40  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
41  ret float %res
42}
43
44define float @f4(float %f1, float *%base, float %acc) {
45; The important thing here is that we don't generate an out-of-range
46; displacement.  Other sequences besides this one would be OK.
47;
48; CHECK-LABEL: f4:
49; CHECK: aghi %r2, 4096
50; CHECK: mseb %f2, %f0, 0(%r2)
51; CHECK-SCALAR: ler %f0, %f2
52; CHECK-VECTOR: ldr %f0, %f2
53; CHECK: br %r14
54  %ptr = getelementptr float, float *%base, i64 1024
55  %f2 = load float, float *%ptr
56  %negacc = fsub float -0.0, %acc
57  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
58  ret float %res
59}
60
61define float @f5(float %f1, float *%base, float %acc) {
62; Here too the important thing is that we don't generate an out-of-range
63; displacement.  Other sequences besides this one would be OK.
64;
65; CHECK-LABEL: f5:
66; CHECK: aghi %r2, -4
67; CHECK: mseb %f2, %f0, 0(%r2)
68; CHECK-SCALAR: ler %f0, %f2
69; CHECK-VECTOR: ldr %f0, %f2
70; CHECK: br %r14
71  %ptr = getelementptr float, float *%base, i64 -1
72  %f2 = load float, float *%ptr
73  %negacc = fsub float -0.0, %acc
74  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
75  ret float %res
76}
77
78define float @f6(float %f1, float *%base, i64 %index, float %acc) {
79; CHECK-LABEL: f6:
80; CHECK: sllg %r1, %r3, 2
81; CHECK: mseb %f2, %f0, 0(%r1,%r2)
82; CHECK-SCALAR: ler %f0, %f2
83; CHECK-VECTOR: ldr %f0, %f2
84; CHECK: br %r14
85  %ptr = getelementptr float, float *%base, i64 %index
86  %f2 = load float, float *%ptr
87  %negacc = fsub float -0.0, %acc
88  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
89  ret float %res
90}
91
92define float @f7(float %f1, float *%base, i64 %index, float %acc) {
93; CHECK-LABEL: f7:
94; CHECK: sllg %r1, %r3, 2
95; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
96; CHECK-SCALAR: ler %f0, %f2
97; CHECK-VECTOR: ldr %f0, %f2
98; CHECK: br %r14
99  %index2 = add i64 %index, 1023
100  %ptr = getelementptr float, float *%base, i64 %index2
101  %f2 = load float, float *%ptr
102  %negacc = fsub float -0.0, %acc
103  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
104  ret float %res
105}
106
107define float @f8(float %f1, float *%base, i64 %index, float %acc) {
108; CHECK-LABEL: f8:
109; CHECK: sllg %r1, %r3, 2
110; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
111; CHECK: mseb %f2, %f0, 0(%r1)
112; CHECK-SCALAR: ler %f0, %f2
113; CHECK-VECTOR: ldr %f0, %f2
114; CHECK: br %r14
115  %index2 = add i64 %index, 1024
116  %ptr = getelementptr float, float *%base, i64 %index2
117  %f2 = load float, float *%ptr
118  %negacc = fsub float -0.0, %acc
119  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
120  ret float %res
121}
122