1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
5  ; CHECK: add_v4f32:
6
7  %1 = load <4 x float>, <4 x float>* %a
8  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <4 x float>, <4 x float>* %b
10  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
11  %3 = fadd <4 x float> %1, %2
12  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <4 x float> %3, <4 x float>* %c
14  ; CHECK-DAG: st.w [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size add_v4f32
18}
19
20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
21  ; CHECK: add_v2f64:
22
23  %1 = load <2 x double>, <2 x double>* %a
24  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <2 x double>, <2 x double>* %b
26  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
27  %3 = fadd <2 x double> %1, %2
28  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <2 x double> %3, <2 x double>* %c
30  ; CHECK-DAG: st.d [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size add_v2f64
34}
35
36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
37  ; CHECK: sub_v4f32:
38
39  %1 = load <4 x float>, <4 x float>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x float>, <4 x float>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = fsub <4 x float> %1, %2
44  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x float> %3, <4 x float>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size sub_v4f32
50}
51
52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
53  ; CHECK: sub_v2f64:
54
55  %1 = load <2 x double>, <2 x double>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x double>, <2 x double>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = fsub <2 x double> %1, %2
60  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x double> %3, <2 x double>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size sub_v2f64
66}
67
68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
69  ; CHECK: mul_v4f32:
70
71  %1 = load <4 x float>, <4 x float>* %a
72  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
73  %2 = load <4 x float>, <4 x float>* %b
74  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
75  %3 = fmul <4 x float> %1, %2
76  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
77  store <4 x float> %3, <4 x float>* %c
78  ; CHECK-DAG: st.w [[R3]], 0($4)
79
80  ret void
81  ; CHECK: .size mul_v4f32
82}
83
84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
85  ; CHECK: mul_v2f64:
86
87  %1 = load <2 x double>, <2 x double>* %a
88  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
89  %2 = load <2 x double>, <2 x double>* %b
90  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
91  %3 = fmul <2 x double> %1, %2
92  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
93  store <2 x double> %3, <2 x double>* %c
94  ; CHECK-DAG: st.d [[R3]], 0($4)
95
96  ret void
97  ; CHECK: .size mul_v2f64
98}
99
100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
101                       <4 x float>* %c) nounwind {
102  ; CHECK: fma_v4f32:
103
104  %1 = load <4 x float>, <4 x float>* %a
105  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
106  %2 = load <4 x float>, <4 x float>* %b
107  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
108  %3 = load <4 x float>, <4 x float>* %c
109  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
110  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
111                                              <4 x float> %3)
112  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
113  store <4 x float> %4, <4 x float>* %d
114  ; CHECK-DAG: st.w [[R1]], 0($4)
115
116  ret void
117  ; CHECK: .size fma_v4f32
118}
119
120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
121                       <2 x double>* %c) nounwind {
122  ; CHECK: fma_v2f64:
123
124  %1 = load <2 x double>, <2 x double>* %a
125  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
126  %2 = load <2 x double>, <2 x double>* %b
127  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
128  %3 = load <2 x double>, <2 x double>* %c
129  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
130  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
131                                               <2 x double> %3)
132  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
133  store <2 x double> %4, <2 x double>* %d
134  ; CHECK-DAG: st.d [[R1]], 0($4)
135
136  ret void
137  ; CHECK: .size fma_v2f64
138}
139
140define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
141                       <4 x float>* %c) nounwind {
142  ; CHECK: fmsub_v4f32:
143
144  %1 = load <4 x float>, <4 x float>* %a
145  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
146  %2 = load <4 x float>, <4 x float>* %b
147  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
148  %3 = load <4 x float>, <4 x float>* %c
149  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
150  %4 = fmul <4 x float> %2, %3
151  %5 = fsub <4 x float> %1, %4
152  ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
153  store <4 x float> %5, <4 x float>* %d
154  ; CHECK-DAG: st.w [[R1]], 0($4)
155
156  ret void
157  ; CHECK: .size fmsub_v4f32
158}
159
160define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
161                       <2 x double>* %c) nounwind {
162  ; CHECK: fmsub_v2f64:
163
164  %1 = load <2 x double>, <2 x double>* %a
165  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
166  %2 = load <2 x double>, <2 x double>* %b
167  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
168  %3 = load <2 x double>, <2 x double>* %c
169  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
170  %4 = fmul <2 x double> %2, %3
171  %5 = fsub <2 x double> %1, %4
172  ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
173  store <2 x double> %5, <2 x double>* %d
174  ; CHECK-DAG: st.d [[R1]], 0($4)
175
176  ret void
177  ; CHECK: .size fmsub_v2f64
178}
179
180define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
181  ; CHECK: fdiv_v4f32:
182
183  %1 = load <4 x float>, <4 x float>* %a
184  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
185  %2 = load <4 x float>, <4 x float>* %b
186  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
187  %3 = fdiv <4 x float> %1, %2
188  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
189  store <4 x float> %3, <4 x float>* %c
190  ; CHECK-DAG: st.w [[R3]], 0($4)
191
192  ret void
193  ; CHECK: .size fdiv_v4f32
194}
195
196define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
197  ; CHECK: fdiv_v2f64:
198
199  %1 = load <2 x double>, <2 x double>* %a
200  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
201  %2 = load <2 x double>, <2 x double>* %b
202  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
203  %3 = fdiv <2 x double> %1, %2
204  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
205  store <2 x double> %3, <2 x double>* %c
206  ; CHECK-DAG: st.d [[R3]], 0($4)
207
208  ret void
209  ; CHECK: .size fdiv_v2f64
210}
211
212define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
213  ; CHECK: fabs_v4f32:
214
215  %1 = load <4 x float>, <4 x float>* %a
216  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
217  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
218  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
219  store <4 x float> %2, <4 x float>* %c
220  ; CHECK-DAG: st.w [[R3]], 0($4)
221
222  ret void
223  ; CHECK: .size fabs_v4f32
224}
225
226define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
227  ; CHECK: fabs_v2f64:
228
229  %1 = load <2 x double>, <2 x double>* %a
230  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
231  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
232  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
233  store <2 x double> %2, <2 x double>* %c
234  ; CHECK-DAG: st.d [[R3]], 0($4)
235
236  ret void
237  ; CHECK: .size fabs_v2f64
238}
239
240define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
241  ; CHECK: fexp2_v4f32:
242
243  %1 = load <4 x float>, <4 x float>* %a
244  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
245  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
246  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
247  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
248  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
249  store <4 x float> %2, <4 x float>* %c
250  ; CHECK-DAG: st.w [[R4]], 0($4)
251
252  ret void
253  ; CHECK: .size fexp2_v4f32
254}
255
256define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
257  ; CHECK: fexp2_v2f64:
258
259  %1 = load <2 x double>, <2 x double>* %a
260  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
261  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
262  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
263  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
264  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
265  store <2 x double> %2, <2 x double>* %c
266  ; CHECK-DAG: st.d [[R4]], 0($4)
267
268  ret void
269  ; CHECK: .size fexp2_v2f64
270}
271
272define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
273  ; CHECK: fexp2_v4f32_2:
274
275  %1 = load <4 x float>, <4 x float>* %a
276  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
277  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
278  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
279  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
280  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
281  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
282  store <4 x float> %3, <4 x float>* %c
283  ; CHECK-DAG: st.w [[R5]], 0($4)
284
285  ret void
286  ; CHECK: .size fexp2_v4f32_2
287}
288
289define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
290  ; CHECK: fexp2_v2f64_2:
291
292  %1 = load <2 x double>, <2 x double>* %a
293  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
294  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
295  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
296  ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1
297  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]]
298  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
299  store <2 x double> %3, <2 x double>* %c
300  ; CHECK-DAG: st.d [[R4]], 0($4)
301
302  ret void
303  ; CHECK: .size fexp2_v2f64_2
304}
305
306define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
307  ; CHECK: fsqrt_v4f32:
308
309  %1 = load <4 x float>, <4 x float>* %a
310  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
311  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
312  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
313  store <4 x float> %2, <4 x float>* %c
314  ; CHECK-DAG: st.w [[R3]], 0($4)
315
316  ret void
317  ; CHECK: .size fsqrt_v4f32
318}
319
320define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
321  ; CHECK: fsqrt_v2f64:
322
323  %1 = load <2 x double>, <2 x double>* %a
324  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
325  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
326  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
327  store <2 x double> %2, <2 x double>* %c
328  ; CHECK-DAG: st.d [[R3]], 0($4)
329
330  ret void
331  ; CHECK: .size fsqrt_v2f64
332}
333
334define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
335  ; CHECK: ffint_u_v4f32:
336
337  %1 = load <4 x i32>, <4 x i32>* %a
338  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
339  %2 = uitofp <4 x i32> %1 to <4 x float>
340  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
341  store <4 x float> %2, <4 x float>* %c
342  ; CHECK-DAG: st.w [[R3]], 0($4)
343
344  ret void
345  ; CHECK: .size ffint_u_v4f32
346}
347
348define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
349  ; CHECK: ffint_u_v2f64:
350
351  %1 = load <2 x i64>, <2 x i64>* %a
352  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
353  %2 = uitofp <2 x i64> %1 to <2 x double>
354  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
355  store <2 x double> %2, <2 x double>* %c
356  ; CHECK-DAG: st.d [[R3]], 0($4)
357
358  ret void
359  ; CHECK: .size ffint_u_v2f64
360}
361
362define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
363  ; CHECK: ffint_s_v4f32:
364
365  %1 = load <4 x i32>, <4 x i32>* %a
366  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
367  %2 = sitofp <4 x i32> %1 to <4 x float>
368  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
369  store <4 x float> %2, <4 x float>* %c
370  ; CHECK-DAG: st.w [[R3]], 0($4)
371
372  ret void
373  ; CHECK: .size ffint_s_v4f32
374}
375
376define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
377  ; CHECK: ffint_s_v2f64:
378
379  %1 = load <2 x i64>, <2 x i64>* %a
380  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
381  %2 = sitofp <2 x i64> %1 to <2 x double>
382  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
383  store <2 x double> %2, <2 x double>* %c
384  ; CHECK-DAG: st.d [[R3]], 0($4)
385
386  ret void
387  ; CHECK: .size ffint_s_v2f64
388}
389
390define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
391  ; CHECK: ftrunc_u_v4f32:
392
393  %1 = load <4 x float>, <4 x float>* %a
394  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
395  %2 = fptoui <4 x float> %1 to <4 x i32>
396  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
397  store <4 x i32> %2, <4 x i32>* %c
398  ; CHECK-DAG: st.w [[R3]], 0($4)
399
400  ret void
401  ; CHECK: .size ftrunc_u_v4f32
402}
403
404define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
405  ; CHECK: ftrunc_u_v2f64:
406
407  %1 = load <2 x double>, <2 x double>* %a
408  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
409  %2 = fptoui <2 x double> %1 to <2 x i64>
410  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
411  store <2 x i64> %2, <2 x i64>* %c
412  ; CHECK-DAG: st.d [[R3]], 0($4)
413
414  ret void
415  ; CHECK: .size ftrunc_u_v2f64
416}
417
418define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
419  ; CHECK: ftrunc_s_v4f32:
420
421  %1 = load <4 x float>, <4 x float>* %a
422  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
423  %2 = fptosi <4 x float> %1 to <4 x i32>
424  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
425  store <4 x i32> %2, <4 x i32>* %c
426  ; CHECK-DAG: st.w [[R3]], 0($4)
427
428  ret void
429  ; CHECK: .size ftrunc_s_v4f32
430}
431
432define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
433  ; CHECK: ftrunc_s_v2f64:
434
435  %1 = load <2 x double>, <2 x double>* %a
436  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
437  %2 = fptosi <2 x double> %1 to <2 x i64>
438  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
439  store <2 x i64> %2, <2 x i64>* %c
440  ; CHECK-DAG: st.d [[R3]], 0($4)
441
442  ret void
443  ; CHECK: .size ftrunc_s_v2f64
444}
445
446declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
447declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
448declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
449declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
450declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
451                                     <4 x float>  %c)
452declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
453                                     <2 x double> %c)
454declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
455declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
456