1; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
3
4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
5  ; CHECK: add_v4f32:
6
7  %1 = load <4 x float>, <4 x float>* %a
8  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <4 x float>, <4 x float>* %b
10  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
11  %3 = fadd <4 x float> %1, %2
12  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <4 x float> %3, <4 x float>* %c
14  ; CHECK-DAG: st.w [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size add_v4f32
18}
19
20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
21  ; CHECK: add_v2f64:
22
23  %1 = load <2 x double>, <2 x double>* %a
24  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <2 x double>, <2 x double>* %b
26  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
27  %3 = fadd <2 x double> %1, %2
28  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <2 x double> %3, <2 x double>* %c
30  ; CHECK-DAG: st.d [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size add_v2f64
34}
35
36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
37  ; CHECK: sub_v4f32:
38
39  %1 = load <4 x float>, <4 x float>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x float>, <4 x float>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = fsub <4 x float> %1, %2
44  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x float> %3, <4 x float>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size sub_v4f32
50}
51
52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
53  ; CHECK: sub_v2f64:
54
55  %1 = load <2 x double>, <2 x double>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x double>, <2 x double>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = fsub <2 x double> %1, %2
60  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x double> %3, <2 x double>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size sub_v2f64
66}
67
68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
69  ; CHECK: mul_v4f32:
70
71  %1 = load <4 x float>, <4 x float>* %a
72  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
73  %2 = load <4 x float>, <4 x float>* %b
74  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
75  %3 = fmul <4 x float> %1, %2
76  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
77  store <4 x float> %3, <4 x float>* %c
78  ; CHECK-DAG: st.w [[R3]], 0($4)
79
80  ret void
81  ; CHECK: .size mul_v4f32
82}
83
84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
85  ; CHECK: mul_v2f64:
86
87  %1 = load <2 x double>, <2 x double>* %a
88  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
89  %2 = load <2 x double>, <2 x double>* %b
90  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
91  %3 = fmul <2 x double> %1, %2
92  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
93  store <2 x double> %3, <2 x double>* %c
94  ; CHECK-DAG: st.d [[R3]], 0($4)
95
96  ret void
97  ; CHECK: .size mul_v2f64
98}
99
100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
101                       <4 x float>* %c) nounwind {
102  ; CHECK: fma_v4f32:
103
104  %1 = load <4 x float>, <4 x float>* %a
105  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
106  %2 = load <4 x float>, <4 x float>* %b
107  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
108  %3 = load <4 x float>, <4 x float>* %c
109  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
110  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
111                                              <4 x float> %3)
112  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
113  store <4 x float> %4, <4 x float>* %d
114  ; CHECK-DAG: st.w [[R1]], 0($4)
115
116  ret void
117  ; CHECK: .size fma_v4f32
118}
119
120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
121                       <2 x double>* %c) nounwind {
122  ; CHECK: fma_v2f64:
123
124  %1 = load <2 x double>, <2 x double>* %a
125  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
126  %2 = load <2 x double>, <2 x double>* %b
127  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
128  %3 = load <2 x double>, <2 x double>* %c
129  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
130  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
131                                               <2 x double> %3)
132  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
133  store <2 x double> %4, <2 x double>* %d
134  ; CHECK-DAG: st.d [[R1]], 0($4)
135
136  ret void
137  ; CHECK: .size fma_v2f64
138}
139
140define void @fmlu_fsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
141                       <4 x float>* %c) nounwind {
142  ; CHECK: fmlu_fsub_v4f32:
143
144  %1 = load <4 x float>, <4 x float>* %b
145  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($6)
146  %2 = load <4 x float>, <4 x float>* %c
147  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($7)
148  %3 = fmul <4 x float> %1, %2
149  ; CHECK-DAG: fmul.w [[R2]], [[R1]], [[R2]]
150  %4 = load <4 x float>, <4 x float>* %a
151  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($5)
152  %5 = fsub <4 x float> %4, %3
153  ; CHECK-DAG: fsub.w [[R2]], [[R3]], [[R2]]
154  store <4 x float> %5, <4 x float>* %d
155  ; CHECK-DAG: st.w [[R2]], 0($4)
156
157  ret void
158  ; CHECK: .size fmlu_fsub_v4f32
159}
160
161define void @fmul_fsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
162                       <2 x double>* %c) nounwind {
163  ; CHECK: fmul_fsub_v2f64:
164
165  %1 = load <2 x double>, <2 x double>* %b
166  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($7)
167  %2 = load <2 x double>, <2 x double>* %c
168  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
169  %3 = fmul <2 x double> %1, %2
170  ; CHECK-DAG: fmul.d [[R1]], [[R2]], [[R1]]
171  %4 = load <2 x double>, <2 x double>* %a
172  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($5)
173  %5 = fsub <2 x double> %4, %3
174  ; CHECK-DAG: fsub.d [[R1]], [[R3]], [[R1]]
175  store <2 x double> %5, <2 x double>* %d
176  ; CHECK-DAG: st.d [[R1]], 0($4)
177
178  ret void
179  ; CHECK: .size fmul_fsub_v2f64
180}
181
182define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
183  ; CHECK: fdiv_v4f32:
184
185  %1 = load <4 x float>, <4 x float>* %a
186  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
187  %2 = load <4 x float>, <4 x float>* %b
188  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
189  %3 = fdiv <4 x float> %1, %2
190  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
191  store <4 x float> %3, <4 x float>* %c
192  ; CHECK-DAG: st.w [[R3]], 0($4)
193
194  ret void
195  ; CHECK: .size fdiv_v4f32
196}
197
198define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
199  ; CHECK: fdiv_v2f64:
200
201  %1 = load <2 x double>, <2 x double>* %a
202  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
203  %2 = load <2 x double>, <2 x double>* %b
204  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
205  %3 = fdiv <2 x double> %1, %2
206  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
207  store <2 x double> %3, <2 x double>* %c
208  ; CHECK-DAG: st.d [[R3]], 0($4)
209
210  ret void
211  ; CHECK: .size fdiv_v2f64
212}
213
214define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
215  ; CHECK: fabs_v4f32:
216
217  %1 = load <4 x float>, <4 x float>* %a
218  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
219  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
220  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
221  store <4 x float> %2, <4 x float>* %c
222  ; CHECK-DAG: st.w [[R3]], 0($4)
223
224  ret void
225  ; CHECK: .size fabs_v4f32
226}
227
228define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
229  ; CHECK: fabs_v2f64:
230
231  %1 = load <2 x double>, <2 x double>* %a
232  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
233  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
234  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
235  store <2 x double> %2, <2 x double>* %c
236  ; CHECK-DAG: st.d [[R3]], 0($4)
237
238  ret void
239  ; CHECK: .size fabs_v2f64
240}
241
242define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
243  ; CHECK: fexp2_v4f32:
244
245  %1 = load <4 x float>, <4 x float>* %a
246  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
247  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
248  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
249  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
250  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
251  store <4 x float> %2, <4 x float>* %c
252  ; CHECK-DAG: st.w [[R4]], 0($4)
253
254  ret void
255  ; CHECK: .size fexp2_v4f32
256}
257
258define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
259  ; CHECK: fexp2_v2f64:
260
261  %1 = load <2 x double>, <2 x double>* %a
262  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
263  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
264  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
265  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
266  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
267  store <2 x double> %2, <2 x double>* %c
268  ; CHECK-DAG: st.d [[R4]], 0($4)
269
270  ret void
271  ; CHECK: .size fexp2_v2f64
272}
273
274define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
275  ; CHECK: fexp2_v4f32_2:
276
277  %1 = load <4 x float>, <4 x float>* %a
278  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
279  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
280  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
281  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
282  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
283  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
284  store <4 x float> %3, <4 x float>* %c
285  ; CHECK-DAG: st.w [[R5]], 0($4)
286
287  ret void
288  ; CHECK: .size fexp2_v4f32_2
289}
290
291define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
292  ; CHECK: fexp2_v2f64_2:
293
294  %1 = load <2 x double>, <2 x double>* %a
295  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
296  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
297  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
298  ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1
299  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]]
300  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
301  store <2 x double> %3, <2 x double>* %c
302  ; CHECK-DAG: st.d [[R4]], 0($4)
303
304  ret void
305  ; CHECK: .size fexp2_v2f64_2
306}
307
308define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
309  ; CHECK: fsqrt_v4f32:
310
311  %1 = load <4 x float>, <4 x float>* %a
312  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
313  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
314  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
315  store <4 x float> %2, <4 x float>* %c
316  ; CHECK-DAG: st.w [[R3]], 0($4)
317
318  ret void
319  ; CHECK: .size fsqrt_v4f32
320}
321
322define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
323  ; CHECK: fsqrt_v2f64:
324
325  %1 = load <2 x double>, <2 x double>* %a
326  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
327  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
328  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
329  store <2 x double> %2, <2 x double>* %c
330  ; CHECK-DAG: st.d [[R3]], 0($4)
331
332  ret void
333  ; CHECK: .size fsqrt_v2f64
334}
335
336define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
337  ; CHECK: ffint_u_v4f32:
338
339  %1 = load <4 x i32>, <4 x i32>* %a
340  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
341  %2 = uitofp <4 x i32> %1 to <4 x float>
342  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
343  store <4 x float> %2, <4 x float>* %c
344  ; CHECK-DAG: st.w [[R3]], 0($4)
345
346  ret void
347  ; CHECK: .size ffint_u_v4f32
348}
349
350define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
351  ; CHECK: ffint_u_v2f64:
352
353  %1 = load <2 x i64>, <2 x i64>* %a
354  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
355  %2 = uitofp <2 x i64> %1 to <2 x double>
356  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
357  store <2 x double> %2, <2 x double>* %c
358  ; CHECK-DAG: st.d [[R3]], 0($4)
359
360  ret void
361  ; CHECK: .size ffint_u_v2f64
362}
363
364define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
365  ; CHECK: ffint_s_v4f32:
366
367  %1 = load <4 x i32>, <4 x i32>* %a
368  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
369  %2 = sitofp <4 x i32> %1 to <4 x float>
370  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
371  store <4 x float> %2, <4 x float>* %c
372  ; CHECK-DAG: st.w [[R3]], 0($4)
373
374  ret void
375  ; CHECK: .size ffint_s_v4f32
376}
377
378define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
379  ; CHECK: ffint_s_v2f64:
380
381  %1 = load <2 x i64>, <2 x i64>* %a
382  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
383  %2 = sitofp <2 x i64> %1 to <2 x double>
384  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
385  store <2 x double> %2, <2 x double>* %c
386  ; CHECK-DAG: st.d [[R3]], 0($4)
387
388  ret void
389  ; CHECK: .size ffint_s_v2f64
390}
391
392define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
393  ; CHECK: ftrunc_u_v4f32:
394
395  %1 = load <4 x float>, <4 x float>* %a
396  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
397  %2 = fptoui <4 x float> %1 to <4 x i32>
398  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
399  store <4 x i32> %2, <4 x i32>* %c
400  ; CHECK-DAG: st.w [[R3]], 0($4)
401
402  ret void
403  ; CHECK: .size ftrunc_u_v4f32
404}
405
406define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
407  ; CHECK: ftrunc_u_v2f64:
408
409  %1 = load <2 x double>, <2 x double>* %a
410  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
411  %2 = fptoui <2 x double> %1 to <2 x i64>
412  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
413  store <2 x i64> %2, <2 x i64>* %c
414  ; CHECK-DAG: st.d [[R3]], 0($4)
415
416  ret void
417  ; CHECK: .size ftrunc_u_v2f64
418}
419
420define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
421  ; CHECK: ftrunc_s_v4f32:
422
423  %1 = load <4 x float>, <4 x float>* %a
424  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
425  %2 = fptosi <4 x float> %1 to <4 x i32>
426  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
427  store <4 x i32> %2, <4 x i32>* %c
428  ; CHECK-DAG: st.w [[R3]], 0($4)
429
430  ret void
431  ; CHECK: .size ftrunc_s_v4f32
432}
433
434define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
435  ; CHECK: ftrunc_s_v2f64:
436
437  %1 = load <2 x double>, <2 x double>* %a
438  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
439  %2 = fptosi <2 x double> %1 to <2 x i64>
440  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
441  store <2 x i64> %2, <2 x i64>* %c
442  ; CHECK-DAG: st.d [[R3]], 0($4)
443
444  ret void
445  ; CHECK: .size ftrunc_s_v2f64
446}
447
448declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
449declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
450declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
451declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
452declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
453                                     <4 x float>  %c)
454declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
455                                     <2 x double> %c)
456declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
457declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
458