1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core2 -mattr=+sse2 | FileCheck %s
3
4; FIXME: Ideally we should be able to fold the entire body of @test1 into a
5; single paddd instruction. At the moment we produce the sequence
6; pshufd+paddq+pshufd. This is fixed with the widening legalization.
7
8define double @test1(double %A) {
9; CHECK-LABEL: test1:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
12; CHECK-NEXT:    retq
13  %1 = bitcast double %A to <2 x i32>
14  %add = add <2 x i32> %1, <i32 3, i32 5>
15  %2 = bitcast <2 x i32> %add to double
16  ret double %2
17}
18
19define double @test2(double %A, double %B) {
20; CHECK-LABEL: test2:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    paddd %xmm1, %xmm0
23; CHECK-NEXT:    retq
24  %1 = bitcast double %A to <2 x i32>
25  %2 = bitcast double %B to <2 x i32>
26  %add = add <2 x i32> %1, %2
27  %3 = bitcast <2 x i32> %add to double
28  ret double %3
29}
30
31define i64 @test3(i64 %A) {
32; CHECK-LABEL: test3:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    movq %rdi, %xmm0
35; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
36; CHECK-NEXT:    movq %xmm0, %rax
37; CHECK-NEXT:    retq
38  %1 = bitcast i64 %A to <2 x float>
39  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
40  %2 = bitcast <2 x float> %add to i64
41  ret i64 %2
42}
43
44; FIXME: Ideally we should be able to fold the entire body of @test4 into a
45; single paddd instruction. This is fixed with the widening legalization.
46
47define i64 @test4(i64 %A) {
48; CHECK-LABEL: test4:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    movq %rdi, %xmm0
51; CHECK-NEXT:    paddd {{.*}}(%rip), %xmm0
52; CHECK-NEXT:    movq %xmm0, %rax
53; CHECK-NEXT:    retq
54  %1 = bitcast i64 %A to <2 x i32>
55  %add = add <2 x i32> %1, <i32 3, i32 5>
56  %2 = bitcast <2 x i32> %add to i64
57  ret i64 %2
58}
59
60define double @test5(double %A) {
61; CHECK-LABEL: test5:
62; CHECK:       # %bb.0:
63; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
64; CHECK-NEXT:    retq
65  %1 = bitcast double %A to <2 x float>
66  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
67  %2 = bitcast <2 x float> %add to double
68  ret double %2
69}
70
71; FIXME: Ideally we should be able to fold the entire body of @test6 into a
72; single paddw instruction. This is fixed with the widening legalization.
73
74define double @test6(double %A) {
75; CHECK-LABEL: test6:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    paddw {{.*}}(%rip), %xmm0
78; CHECK-NEXT:    retq
79  %1 = bitcast double %A to <4 x i16>
80  %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
81  %2 = bitcast <4 x i16> %add to double
82  ret double %2
83}
84
85define double @test7(double %A, double %B) {
86; CHECK-LABEL: test7:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    paddw %xmm1, %xmm0
89; CHECK-NEXT:    retq
90  %1 = bitcast double %A to <4 x i16>
91  %2 = bitcast double %B to <4 x i16>
92  %add = add <4 x i16> %1, %2
93  %3 = bitcast <4 x i16> %add to double
94  ret double %3
95}
96
97; FIXME: Ideally we should be able to fold the entire body of @test8 into a
98; single paddb instruction. At the moment we produce the sequence
99; pshufd+paddw+pshufd. This is fixed with the widening legalization.
100
101define double @test8(double %A) {
102; CHECK-LABEL: test8:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    paddb {{.*}}(%rip), %xmm0
105; CHECK-NEXT:    retq
106  %1 = bitcast double %A to <8 x i8>
107  %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
108  %2 = bitcast <8 x i8> %add to double
109  ret double %2
110}
111
112define double @test9(double %A, double %B) {
113; CHECK-LABEL: test9:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    paddb %xmm1, %xmm0
116; CHECK-NEXT:    retq
117  %1 = bitcast double %A to <8 x i8>
118  %2 = bitcast double %B to <8 x i8>
119  %add = add <8 x i8> %1, %2
120  %3 = bitcast <8 x i8> %add to double
121  ret double %3
122}
123