1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4declare double @llvm.fma.f64(double, double, double) nounwind readnone
5declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
6declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
7declare double @llvm.fabs.f64(double) nounwind readnone
8
9; FUNC-LABEL: {{^}}fma_f64:
10; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
11define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
12                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
13   %r0 = load double, double addrspace(1)* %in1
14   %r1 = load double, double addrspace(1)* %in2
15   %r2 = load double, double addrspace(1)* %in3
16   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
17   store double %r3, double addrspace(1)* %out
18   ret void
19}
20
21; FUNC-LABEL: {{^}}fma_v2f64:
22; SI: v_fma_f64
23; SI: v_fma_f64
24define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
25                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
26   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
27   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
28   %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
29   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
30   store <2 x double> %r3, <2 x double> addrspace(1)* %out
31   ret void
32}
33
34; FUNC-LABEL: {{^}}fma_v4f64:
35; SI: v_fma_f64
36; SI: v_fma_f64
37; SI: v_fma_f64
38; SI: v_fma_f64
39define amdgpu_kernel void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
40                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
41   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
42   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
43   %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
44   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
45   store <4 x double> %r3, <4 x double> addrspace(1)* %out
46   ret void
47}
48
49; FUNC-LABEL: {{^}}fma_f64_abs_src0:
50; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
51define amdgpu_kernel void @fma_f64_abs_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
52                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
53   %r0 = load double, double addrspace(1)* %in1
54   %r1 = load double, double addrspace(1)* %in2
55   %r2 = load double, double addrspace(1)* %in3
56   %fabs = call double @llvm.fabs.f64(double %r0)
57   %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2)
58   store double %r3, double addrspace(1)* %out
59   ret void
60}
61
62; FUNC-LABEL: {{^}}fma_f64_abs_src1:
63; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
64define amdgpu_kernel void @fma_f64_abs_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
65                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
66   %r0 = load double, double addrspace(1)* %in1
67   %r1 = load double, double addrspace(1)* %in2
68   %r2 = load double, double addrspace(1)* %in3
69   %fabs = call double @llvm.fabs.f64(double %r1)
70   %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2)
71   store double %r3, double addrspace(1)* %out
72   ret void
73}
74
75; FUNC-LABEL: {{^}}fma_f64_abs_src2:
76; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
77define amdgpu_kernel void @fma_f64_abs_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
78                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
79   %r0 = load double, double addrspace(1)* %in1
80   %r1 = load double, double addrspace(1)* %in2
81   %r2 = load double, double addrspace(1)* %in3
82   %fabs = call double @llvm.fabs.f64(double %r2)
83   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs)
84   store double %r3, double addrspace(1)* %out
85   ret void
86}
87
88; FUNC-LABEL: {{^}}fma_f64_neg_src0:
89; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
90define amdgpu_kernel void @fma_f64_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
91                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
92   %r0 = load double, double addrspace(1)* %in1
93   %r1 = load double, double addrspace(1)* %in2
94   %r2 = load double, double addrspace(1)* %in3
95   %fsub = fsub double -0.000000e+00, %r0
96   %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
97   store double %r3, double addrspace(1)* %out
98   ret void
99}
100
101; FUNC-LABEL: {{^}}fma_f64_neg_src1:
102; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
103define amdgpu_kernel void @fma_f64_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
104                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
105   %r0 = load double, double addrspace(1)* %in1
106   %r1 = load double, double addrspace(1)* %in2
107   %r2 = load double, double addrspace(1)* %in3
108   %fsub = fsub double -0.000000e+00, %r1
109   %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
110   store double %r3, double addrspace(1)* %out
111   ret void
112}
113
114; FUNC-LABEL: {{^}}fma_f64_neg_src2:
115; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
116define amdgpu_kernel void @fma_f64_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
117                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
118   %r0 = load double, double addrspace(1)* %in1
119   %r1 = load double, double addrspace(1)* %in2
120   %r2 = load double, double addrspace(1)* %in3
121   %fsub = fsub double -0.000000e+00, %r2
122   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
123   store double %r3, double addrspace(1)* %out
124   ret void
125}
126
127; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0:
128; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
129define amdgpu_kernel void @fma_f64_abs_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
130                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
131   %r0 = load double, double addrspace(1)* %in1
132   %r1 = load double, double addrspace(1)* %in2
133   %r2 = load double, double addrspace(1)* %in3
134   %fabs = call double @llvm.fabs.f64(double %r0)
135   %fsub = fsub double -0.000000e+00, %fabs
136   %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
137   store double %r3, double addrspace(1)* %out
138   ret void
139}
140
141; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1:
142; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
143define amdgpu_kernel void @fma_f64_abs_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
144                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
145   %r0 = load double, double addrspace(1)* %in1
146   %r1 = load double, double addrspace(1)* %in2
147   %r2 = load double, double addrspace(1)* %in3
148   %fabs = call double @llvm.fabs.f64(double %r1)
149   %fsub = fsub double -0.000000e+00, %fabs
150   %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
151   store double %r3, double addrspace(1)* %out
152   ret void
153}
154
155; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2:
156; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
157define amdgpu_kernel void @fma_f64_abs_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
158                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
159   %r0 = load double, double addrspace(1)* %in1
160   %r1 = load double, double addrspace(1)* %in2
161   %r2 = load double, double addrspace(1)* %in3
162   %fabs = call double @llvm.fabs.f64(double %r2)
163   %fsub = fsub double -0.000000e+00, %fabs
164   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
165   store double %r3, double addrspace(1)* %out
166   ret void
167}
168
169; FUNC-LABEL: {{^}}fma_f64_lit_src0:
170; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
171define amdgpu_kernel void @fma_f64_lit_src0(double addrspace(1)* %out,
172                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
173   %r1 = load double, double addrspace(1)* %in2
174   %r2 = load double, double addrspace(1)* %in3
175   %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2)
176   store double %r3, double addrspace(1)* %out
177   ret void
178}
179
180; FUNC-LABEL: {{^}}fma_f64_lit_src1:
181; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
182define amdgpu_kernel void @fma_f64_lit_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
183                     double addrspace(1)* %in3) {
184   %r0 = load double, double addrspace(1)* %in1
185   %r2 = load double, double addrspace(1)* %in3
186   %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2)
187   store double %r3, double addrspace(1)* %out
188   ret void
189}
190
191; FUNC-LABEL: {{^}}fma_f64_lit_src2:
192; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
193define amdgpu_kernel void @fma_f64_lit_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
194                     double addrspace(1)* %in2) {
195   %r0 = load double, double addrspace(1)* %in1
196   %r1 = load double, double addrspace(1)* %in2
197   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0)
198   store double %r3, double addrspace(1)* %out
199   ret void
200}
201