1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
2; There are no MMX operations here, so we use XMM or i64.
3
4define void @ti8(double %a, double %b) nounwind {
5entry:
6        %tmp1 = bitcast double %a to <8 x i8>
7        %tmp2 = bitcast double %b to <8 x i8>
8        %tmp3 = add <8 x i8> %tmp1, %tmp2
9; CHECK:  paddb %xmm1, %xmm0
10        store <8 x i8> %tmp3, <8 x i8>* null
11        ret void
12}
13
14define void @ti16(double %a, double %b) nounwind {
15entry:
16        %tmp1 = bitcast double %a to <4 x i16>
17        %tmp2 = bitcast double %b to <4 x i16>
18        %tmp3 = add <4 x i16> %tmp1, %tmp2
19; CHECK:  paddw %xmm1, %xmm0
20        store <4 x i16> %tmp3, <4 x i16>* null
21        ret void
22}
23
24define void @ti32(double %a, double %b) nounwind {
25entry:
26        %tmp1 = bitcast double %a to <2 x i32>
27        %tmp2 = bitcast double %b to <2 x i32>
28        %tmp3 = add <2 x i32> %tmp1, %tmp2
29; CHECK:  paddd %xmm1, %xmm0
30        store <2 x i32> %tmp3, <2 x i32>* null
31        ret void
32}
33
34; CHECK: ti64
35define void @ti64(double %a, double %b) nounwind {
36entry:
37        %tmp1 = bitcast double %a to <1 x i64>
38        %tmp2 = bitcast double %b to <1 x i64>
39        %tmp3 = add <1 x i64> %tmp1, %tmp2
40; CHECK:  addq
41        store <1 x i64> %tmp3, <1 x i64>* null
42        ret void
43}
44
45; MMX intrinsics calls get us MMX instructions.
46; CHECK: ti8a
47define void @ti8a(double %a, double %b) nounwind {
48entry:
49        %tmp1 = bitcast double %a to x86_mmx
50; CHECK: movdq2q
51        %tmp2 = bitcast double %b to x86_mmx
52; CHECK: movdq2q
53        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
54        store x86_mmx %tmp3, x86_mmx* null
55        ret void
56}
57
58define void @ti16a(double %a, double %b) nounwind {
59entry:
60        %tmp1 = bitcast double %a to x86_mmx
61; CHECK: movdq2q
62        %tmp2 = bitcast double %b to x86_mmx
63; CHECK: movdq2q
64        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
65        store x86_mmx %tmp3, x86_mmx* null
66        ret void
67}
68
69define void @ti32a(double %a, double %b) nounwind {
70entry:
71        %tmp1 = bitcast double %a to x86_mmx
72; CHECK: movdq2q
73        %tmp2 = bitcast double %b to x86_mmx
74; CHECK: movdq2q
75        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
76        store x86_mmx %tmp3, x86_mmx* null
77        ret void
78}
79
80define void @ti64a(double %a, double %b) nounwind {
81entry:
82        %tmp1 = bitcast double %a to x86_mmx
83; CHECK: movdq2q
84        %tmp2 = bitcast double %b to x86_mmx
85; CHECK: movdq2q
86        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
87        store x86_mmx %tmp3, x86_mmx* null
88        ret void
89}
90
91declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
92declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
93declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
94declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
95