1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5  ; CHECK: add_v16i8:
6
7  %1 = load <16 x i8>, <16 x i8>* %a
8  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <16 x i8>, <16 x i8>* %b
10  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
11  %3 = add <16 x i8> %1, %2
12  ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <16 x i8> %3, <16 x i8>* %c
14  ; CHECK-DAG: st.b [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size add_v16i8
18}
19
20define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
21  ; CHECK: add_v8i16:
22
23  %1 = load <8 x i16>, <8 x i16>* %a
24  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <8 x i16>, <8 x i16>* %b
26  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
27  %3 = add <8 x i16> %1, %2
28  ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <8 x i16> %3, <8 x i16>* %c
30  ; CHECK-DAG: st.h [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size add_v8i16
34}
35
36define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
37  ; CHECK: add_v4i32:
38
39  %1 = load <4 x i32>, <4 x i32>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x i32>, <4 x i32>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = add <4 x i32> %1, %2
44  ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x i32> %3, <4 x i32>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size add_v4i32
50}
51
52define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
53  ; CHECK: add_v2i64:
54
55  %1 = load <2 x i64>, <2 x i64>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x i64>, <2 x i64>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = add <2 x i64> %1, %2
60  ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x i64> %3, <2 x i64>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size add_v2i64
66}
67
68define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
69  ; CHECK: add_v16i8_i:
70
71  %1 = load <16 x i8>, <16 x i8>* %a
72  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
73  %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
74                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
75  ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
76  store <16 x i8> %2, <16 x i8>* %c
77  ; CHECK-DAG: st.b [[R3]], 0($4)
78
79  ret void
80  ; CHECK: .size add_v16i8_i
81}
82
83define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
84  ; CHECK: add_v8i16_i:
85
86  %1 = load <8 x i16>, <8 x i16>* %a
87  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
88  %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
89                          i16 1, i16 1, i16 1, i16 1>
90  ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
91  store <8 x i16> %2, <8 x i16>* %c
92  ; CHECK-DAG: st.h [[R3]], 0($4)
93
94  ret void
95  ; CHECK: .size add_v8i16_i
96}
97
98define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
99  ; CHECK: add_v4i32_i:
100
101  %1 = load <4 x i32>, <4 x i32>* %a
102  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
103  %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
104  ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
105  store <4 x i32> %2, <4 x i32>* %c
106  ; CHECK-DAG: st.w [[R3]], 0($4)
107
108  ret void
109  ; CHECK: .size add_v4i32_i
110}
111
112define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
113  ; CHECK: add_v2i64_i:
114
115  %1 = load <2 x i64>, <2 x i64>* %a
116  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
117  %2 = add <2 x i64> %1, <i64 1, i64 1>
118  ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
119  store <2 x i64> %2, <2 x i64>* %c
120  ; CHECK-DAG: st.d [[R3]], 0($4)
121
122  ret void
123  ; CHECK: .size add_v2i64_i
124}
125
126define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
127  ; CHECK: sub_v16i8:
128
129  %1 = load <16 x i8>, <16 x i8>* %a
130  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
131  %2 = load <16 x i8>, <16 x i8>* %b
132  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
133  %3 = sub <16 x i8> %1, %2
134  ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
135  store <16 x i8> %3, <16 x i8>* %c
136  ; CHECK-DAG: st.b [[R3]], 0($4)
137
138  ret void
139  ; CHECK: .size sub_v16i8
140}
141
142define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
143  ; CHECK: sub_v8i16:
144
145  %1 = load <8 x i16>, <8 x i16>* %a
146  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147  %2 = load <8 x i16>, <8 x i16>* %b
148  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
149  %3 = sub <8 x i16> %1, %2
150  ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
151  store <8 x i16> %3, <8 x i16>* %c
152  ; CHECK-DAG: st.h [[R3]], 0($4)
153
154  ret void
155  ; CHECK: .size sub_v8i16
156}
157
158define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
159  ; CHECK: sub_v4i32:
160
161  %1 = load <4 x i32>, <4 x i32>* %a
162  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
163  %2 = load <4 x i32>, <4 x i32>* %b
164  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
165  %3 = sub <4 x i32> %1, %2
166  ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
167  store <4 x i32> %3, <4 x i32>* %c
168  ; CHECK-DAG: st.w [[R3]], 0($4)
169
170  ret void
171  ; CHECK: .size sub_v4i32
172}
173
174define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
175  ; CHECK: sub_v2i64:
176
177  %1 = load <2 x i64>, <2 x i64>* %a
178  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
179  %2 = load <2 x i64>, <2 x i64>* %b
180  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
181  %3 = sub <2 x i64> %1, %2
182  ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
183  store <2 x i64> %3, <2 x i64>* %c
184  ; CHECK-DAG: st.d [[R3]], 0($4)
185
186  ret void
187  ; CHECK: .size sub_v2i64
188}
189
190define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
191  ; CHECK: sub_v16i8_i:
192
193  %1 = load <16 x i8>, <16 x i8>* %a
194  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
195  %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
196                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
197  ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
198  store <16 x i8> %2, <16 x i8>* %c
199  ; CHECK-DAG: st.b [[R3]], 0($4)
200
201  ret void
202  ; CHECK: .size sub_v16i8_i
203}
204
205define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
206  ; CHECK: sub_v8i16_i:
207
208  %1 = load <8 x i16>, <8 x i16>* %a
209  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
210  %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
211                          i16 1, i16 1, i16 1, i16 1>
212  ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
213  store <8 x i16> %2, <8 x i16>* %c
214  ; CHECK-DAG: st.h [[R3]], 0($4)
215
216  ret void
217  ; CHECK: .size sub_v8i16_i
218}
219
220define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
221  ; CHECK: sub_v4i32_i:
222
223  %1 = load <4 x i32>, <4 x i32>* %a
224  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
225  %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
226  ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
227  store <4 x i32> %2, <4 x i32>* %c
228  ; CHECK-DAG: st.w [[R3]], 0($4)
229
230  ret void
231  ; CHECK: .size sub_v4i32_i
232}
233
234define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
235  ; CHECK: sub_v2i64_i:
236
237  %1 = load <2 x i64>, <2 x i64>* %a
238  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
239  %2 = sub <2 x i64> %1, <i64 1, i64 1>
240  ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
241  store <2 x i64> %2, <2 x i64>* %c
242  ; CHECK-DAG: st.d [[R3]], 0($4)
243
244  ret void
245  ; CHECK: .size sub_v2i64_i
246}
247
248define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
249  ; CHECK: mul_v16i8:
250
251  %1 = load <16 x i8>, <16 x i8>* %a
252  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
253  %2 = load <16 x i8>, <16 x i8>* %b
254  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
255  %3 = mul <16 x i8> %1, %2
256  ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
257  store <16 x i8> %3, <16 x i8>* %c
258  ; CHECK-DAG: st.b [[R3]], 0($4)
259
260  ret void
261  ; CHECK: .size mul_v16i8
262}
263
264define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
265  ; CHECK: mul_v8i16:
266
267  %1 = load <8 x i16>, <8 x i16>* %a
268  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
269  %2 = load <8 x i16>, <8 x i16>* %b
270  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
271  %3 = mul <8 x i16> %1, %2
272  ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
273  store <8 x i16> %3, <8 x i16>* %c
274  ; CHECK-DAG: st.h [[R3]], 0($4)
275
276  ret void
277  ; CHECK: .size mul_v8i16
278}
279
280define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
281  ; CHECK: mul_v4i32:
282
283  %1 = load <4 x i32>, <4 x i32>* %a
284  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
285  %2 = load <4 x i32>, <4 x i32>* %b
286  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
287  %3 = mul <4 x i32> %1, %2
288  ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
289  store <4 x i32> %3, <4 x i32>* %c
290  ; CHECK-DAG: st.w [[R3]], 0($4)
291
292  ret void
293  ; CHECK: .size mul_v4i32
294}
295
296define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
297  ; CHECK: mul_v2i64:
298
299  %1 = load <2 x i64>, <2 x i64>* %a
300  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
301  %2 = load <2 x i64>, <2 x i64>* %b
302  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
303  %3 = mul <2 x i64> %1, %2
304  ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
305  store <2 x i64> %3, <2 x i64>* %c
306  ; CHECK-DAG: st.d [[R3]], 0($4)
307
308  ret void
309  ; CHECK: .size mul_v2i64
310}
311
312define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
313                         <16 x i8>* %c) nounwind {
314  ; CHECK: maddv_v16i8:
315
316  %1 = load <16 x i8>, <16 x i8>* %a
317  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
318  %2 = load <16 x i8>, <16 x i8>* %b
319  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
320  %3 = load <16 x i8>, <16 x i8>* %c
321  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
322  %4 = mul <16 x i8> %2, %3
323  %5 = add <16 x i8> %4, %1
324  ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
325  store <16 x i8> %5, <16 x i8>* %d
326  ; CHECK-DAG: st.b [[R1]], 0($4)
327
328  ret void
329  ; CHECK: .size maddv_v16i8
330}
331
332define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
333                         <8 x i16>* %c) nounwind {
334  ; CHECK: maddv_v8i16:
335
336  %1 = load <8 x i16>, <8 x i16>* %a
337  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
338  %2 = load <8 x i16>, <8 x i16>* %b
339  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
340  %3 = load <8 x i16>, <8 x i16>* %c
341  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
342  %4 = mul <8 x i16> %2, %3
343  %5 = add <8 x i16> %4, %1
344  ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
345  store <8 x i16> %5, <8 x i16>* %d
346  ; CHECK-DAG: st.h [[R1]], 0($4)
347
348  ret void
349  ; CHECK: .size maddv_v8i16
350}
351
352define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
353                         <4 x i32>* %c) nounwind {
354  ; CHECK: maddv_v4i32:
355
356  %1 = load <4 x i32>, <4 x i32>* %a
357  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
358  %2 = load <4 x i32>, <4 x i32>* %b
359  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
360  %3 = load <4 x i32>, <4 x i32>* %c
361  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
362  %4 = mul <4 x i32> %2, %3
363  %5 = add <4 x i32> %4, %1
364  ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
365  store <4 x i32> %5, <4 x i32>* %d
366  ; CHECK-DAG: st.w [[R1]], 0($4)
367
368  ret void
369  ; CHECK: .size maddv_v4i32
370}
371
372define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
373                         <2 x i64>* %c) nounwind {
374  ; CHECK: maddv_v2i64:
375
376  %1 = load <2 x i64>, <2 x i64>* %a
377  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
378  %2 = load <2 x i64>, <2 x i64>* %b
379  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
380  %3 = load <2 x i64>, <2 x i64>* %c
381  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
382  %4 = mul <2 x i64> %2, %3
383  %5 = add <2 x i64> %4, %1
384  ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
385  store <2 x i64> %5, <2 x i64>* %d
386  ; CHECK-DAG: st.d [[R1]], 0($4)
387
388  ret void
389  ; CHECK: .size maddv_v2i64
390}
391
392define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
393                         <16 x i8>* %c) nounwind {
394  ; CHECK: msubv_v16i8:
395
396  %1 = load <16 x i8>, <16 x i8>* %a
397  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
398  %2 = load <16 x i8>, <16 x i8>* %b
399  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
400  %3 = load <16 x i8>, <16 x i8>* %c
401  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
402  %4 = mul <16 x i8> %2, %3
403  %5 = sub <16 x i8> %1, %4
404  ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
405  store <16 x i8> %5, <16 x i8>* %d
406  ; CHECK-DAG: st.b [[R1]], 0($4)
407
408  ret void
409  ; CHECK: .size msubv_v16i8
410}
411
412define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
413                         <8 x i16>* %c) nounwind {
414  ; CHECK: msubv_v8i16:
415
416  %1 = load <8 x i16>, <8 x i16>* %a
417  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
418  %2 = load <8 x i16>, <8 x i16>* %b
419  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
420  %3 = load <8 x i16>, <8 x i16>* %c
421  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
422  %4 = mul <8 x i16> %2, %3
423  %5 = sub <8 x i16> %1, %4
424  ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
425  store <8 x i16> %5, <8 x i16>* %d
426  ; CHECK-DAG: st.h [[R1]], 0($4)
427
428  ret void
429  ; CHECK: .size msubv_v8i16
430}
431
432define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
433                         <4 x i32>* %c) nounwind {
434  ; CHECK: msubv_v4i32:
435
436  %1 = load <4 x i32>, <4 x i32>* %a
437  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
438  %2 = load <4 x i32>, <4 x i32>* %b
439  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
440  %3 = load <4 x i32>, <4 x i32>* %c
441  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
442  %4 = mul <4 x i32> %2, %3
443  %5 = sub <4 x i32> %1, %4
444  ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
445  store <4 x i32> %5, <4 x i32>* %d
446  ; CHECK-DAG: st.w [[R1]], 0($4)
447
448  ret void
449  ; CHECK: .size msubv_v4i32
450}
451
452define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
453                         <2 x i64>* %c) nounwind {
454  ; CHECK: msubv_v2i64:
455
456  %1 = load <2 x i64>, <2 x i64>* %a
457  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
458  %2 = load <2 x i64>, <2 x i64>* %b
459  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
460  %3 = load <2 x i64>, <2 x i64>* %c
461  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
462  %4 = mul <2 x i64> %2, %3
463  %5 = sub <2 x i64> %1, %4
464  ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
465  store <2 x i64> %5, <2 x i64>* %d
466  ; CHECK-DAG: st.d [[R1]], 0($4)
467
468  ret void
469  ; CHECK: .size msubv_v2i64
470}
471
472define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
473  ; CHECK: div_s_v16i8:
474
475  %1 = load <16 x i8>, <16 x i8>* %a
476  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
477  %2 = load <16 x i8>, <16 x i8>* %b
478  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
479  %3 = sdiv <16 x i8> %1, %2
480  ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
481  store <16 x i8> %3, <16 x i8>* %c
482  ; CHECK-DAG: st.b [[R3]], 0($4)
483
484  ret void
485  ; CHECK: .size div_s_v16i8
486}
487
488define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
489  ; CHECK: div_s_v8i16:
490
491  %1 = load <8 x i16>, <8 x i16>* %a
492  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
493  %2 = load <8 x i16>, <8 x i16>* %b
494  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
495  %3 = sdiv <8 x i16> %1, %2
496  ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
497  store <8 x i16> %3, <8 x i16>* %c
498  ; CHECK-DAG: st.h [[R3]], 0($4)
499
500  ret void
501  ; CHECK: .size div_s_v8i16
502}
503
504define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
505  ; CHECK: div_s_v4i32:
506
507  %1 = load <4 x i32>, <4 x i32>* %a
508  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
509  %2 = load <4 x i32>, <4 x i32>* %b
510  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
511  %3 = sdiv <4 x i32> %1, %2
512  ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
513  store <4 x i32> %3, <4 x i32>* %c
514  ; CHECK-DAG: st.w [[R3]], 0($4)
515
516  ret void
517  ; CHECK: .size div_s_v4i32
518}
519
520define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
521  ; CHECK: div_s_v2i64:
522
523  %1 = load <2 x i64>, <2 x i64>* %a
524  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
525  %2 = load <2 x i64>, <2 x i64>* %b
526  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
527  %3 = sdiv <2 x i64> %1, %2
528  ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
529  store <2 x i64> %3, <2 x i64>* %c
530  ; CHECK-DAG: st.d [[R3]], 0($4)
531
532  ret void
533  ; CHECK: .size div_s_v2i64
534}
535
536define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
537  ; CHECK: div_u_v16i8:
538
539  %1 = load <16 x i8>, <16 x i8>* %a
540  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
541  %2 = load <16 x i8>, <16 x i8>* %b
542  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
543  %3 = udiv <16 x i8> %1, %2
544  ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
545  store <16 x i8> %3, <16 x i8>* %c
546  ; CHECK-DAG: st.b [[R3]], 0($4)
547
548  ret void
549  ; CHECK: .size div_u_v16i8
550}
551
552define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
553  ; CHECK: div_u_v8i16:
554
555  %1 = load <8 x i16>, <8 x i16>* %a
556  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
557  %2 = load <8 x i16>, <8 x i16>* %b
558  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
559  %3 = udiv <8 x i16> %1, %2
560  ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
561  store <8 x i16> %3, <8 x i16>* %c
562  ; CHECK-DAG: st.h [[R3]], 0($4)
563
564  ret void
565  ; CHECK: .size div_u_v8i16
566}
567
568define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
569  ; CHECK: div_u_v4i32:
570
571  %1 = load <4 x i32>, <4 x i32>* %a
572  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
573  %2 = load <4 x i32>, <4 x i32>* %b
574  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
575  %3 = udiv <4 x i32> %1, %2
576  ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
577  store <4 x i32> %3, <4 x i32>* %c
578  ; CHECK-DAG: st.w [[R3]], 0($4)
579
580  ret void
581  ; CHECK: .size div_u_v4i32
582}
583
584define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
585  ; CHECK: div_u_v2i64:
586
587  %1 = load <2 x i64>, <2 x i64>* %a
588  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
589  %2 = load <2 x i64>, <2 x i64>* %b
590  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
591  %3 = udiv <2 x i64> %1, %2
592  ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
593  store <2 x i64> %3, <2 x i64>* %c
594  ; CHECK-DAG: st.d [[R3]], 0($4)
595
596  ret void
597  ; CHECK: .size div_u_v2i64
598}
599
600define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
601  ; CHECK: mod_s_v16i8:
602
603  %1 = load <16 x i8>, <16 x i8>* %a
604  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
605  %2 = load <16 x i8>, <16 x i8>* %b
606  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
607  %3 = srem <16 x i8> %1, %2
608  ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
609  store <16 x i8> %3, <16 x i8>* %c
610  ; CHECK-DAG: st.b [[R3]], 0($4)
611
612  ret void
613  ; CHECK: .size mod_s_v16i8
614}
615
616define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
617  ; CHECK: mod_s_v8i16:
618
619  %1 = load <8 x i16>, <8 x i16>* %a
620  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
621  %2 = load <8 x i16>, <8 x i16>* %b
622  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
623  %3 = srem <8 x i16> %1, %2
624  ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
625  store <8 x i16> %3, <8 x i16>* %c
626  ; CHECK-DAG: st.h [[R3]], 0($4)
627
628  ret void
629  ; CHECK: .size mod_s_v8i16
630}
631
632define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
633  ; CHECK: mod_s_v4i32:
634
635  %1 = load <4 x i32>, <4 x i32>* %a
636  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
637  %2 = load <4 x i32>, <4 x i32>* %b
638  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
639  %3 = srem <4 x i32> %1, %2
640  ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
641  store <4 x i32> %3, <4 x i32>* %c
642  ; CHECK-DAG: st.w [[R3]], 0($4)
643
644  ret void
645  ; CHECK: .size mod_s_v4i32
646}
647
648define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
649  ; CHECK: mod_s_v2i64:
650
651  %1 = load <2 x i64>, <2 x i64>* %a
652  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
653  %2 = load <2 x i64>, <2 x i64>* %b
654  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
655  %3 = srem <2 x i64> %1, %2
656  ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
657  store <2 x i64> %3, <2 x i64>* %c
658  ; CHECK-DAG: st.d [[R3]], 0($4)
659
660  ret void
661  ; CHECK: .size mod_s_v2i64
662}
663
664define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
665  ; CHECK: mod_u_v16i8:
666
667  %1 = load <16 x i8>, <16 x i8>* %a
668  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
669  %2 = load <16 x i8>, <16 x i8>* %b
670  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
671  %3 = urem <16 x i8> %1, %2
672  ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
673  store <16 x i8> %3, <16 x i8>* %c
674  ; CHECK-DAG: st.b [[R3]], 0($4)
675
676  ret void
677  ; CHECK: .size mod_u_v16i8
678}
679
680define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
681  ; CHECK: mod_u_v8i16:
682
683  %1 = load <8 x i16>, <8 x i16>* %a
684  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
685  %2 = load <8 x i16>, <8 x i16>* %b
686  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
687  %3 = urem <8 x i16> %1, %2
688  ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
689  store <8 x i16> %3, <8 x i16>* %c
690  ; CHECK-DAG: st.h [[R3]], 0($4)
691
692  ret void
693  ; CHECK: .size mod_u_v8i16
694}
695
696define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
697  ; CHECK: mod_u_v4i32:
698
699  %1 = load <4 x i32>, <4 x i32>* %a
700  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
701  %2 = load <4 x i32>, <4 x i32>* %b
702  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
703  %3 = urem <4 x i32> %1, %2
704  ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
705  store <4 x i32> %3, <4 x i32>* %c
706  ; CHECK-DAG: st.w [[R3]], 0($4)
707
708  ret void
709  ; CHECK: .size mod_u_v4i32
710}
711
712define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
713  ; CHECK: mod_u_v2i64:
714
715  %1 = load <2 x i64>, <2 x i64>* %a
716  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
717  %2 = load <2 x i64>, <2 x i64>* %b
718  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
719  %3 = urem <2 x i64> %1, %2
720  ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
721  store <2 x i64> %3, <2 x i64>* %c
722  ; CHECK-DAG: st.d [[R3]], 0($4)
723
724  ret void
725  ; CHECK: .size mod_u_v2i64
726}
727