1; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
2; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
3; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7target triple = "x86_64-apple-macosx10.8.0"
8
9; CHECK-LABEL: 'add'
10define i32 @add(i32 %arg) {
11  ; SSSE3: cost of 1 {{.*}} %A = add
12  ; SSE42: cost of 1 {{.*}} %A = add
13  ; AVX: cost of 1 {{.*}} %A = add
14  ; AVX2: cost of 1 {{.*}} %A = add
15  %A = add <4 x i32> undef, undef
16  ; SSSE3: cost of 2 {{.*}} %B = add
17  ; SSE42: cost of 2 {{.*}} %B = add
18  ; AVX: cost of 4 {{.*}} %B = add
19  ; AVX2: cost of 1 {{.*}} %B = add
20  %B = add <8 x i32> undef, undef
21  ; SSSE3: cost of 1 {{.*}} %C = add
22  ; SSE42: cost of 1 {{.*}} %C = add
23  ; AVX: cost of 1 {{.*}} %C = add
24  ; AVX2: cost of 1 {{.*}} %C = add
25  %C = add <2 x i64> undef, undef
26  ; SSSE3: cost of 2 {{.*}} %D = add
27  ; SSE42: cost of 2 {{.*}} %D = add
28  ; AVX: cost of 4 {{.*}} %D = add
29  ; AVX2: cost of 1 {{.*}} %D = add
30  %D = add <4 x i64> undef, undef
31  ; SSSE3: cost of 4 {{.*}} %E = add
32  ; SSE42: cost of 4 {{.*}} %E = add
33  ; AVX: cost of 8 {{.*}} %E = add
34  ; AVX2: cost of 2 {{.*}} %E = add
35  %E = add <8 x i64> undef, undef
36  ret i32 undef
37}
38
39; CHECK-LABEL: 'xor'
40define i32 @xor(i32 %arg) {
41  ; SSSE3: cost of 1 {{.*}} %A = xor
42  ; SSE42: cost of 1 {{.*}} %A = xor
43  ; AVX: cost of 1 {{.*}} %A = xor
44  ; AVX2: cost of 1 {{.*}} %A = xor
45  %A = xor <4 x i32> undef, undef
46  ; SSSE3: cost of 2 {{.*}} %B = xor
47  ; SSE42: cost of 2 {{.*}} %B = xor
48  ; AVX: cost of 1 {{.*}} %B = xor
49  ; AVX2: cost of 1 {{.*}} %B = xor
50  %B = xor <8 x i32> undef, undef
51  ; SSSE3: cost of 1 {{.*}} %C = xor
52  ; SSE42: cost of 1 {{.*}} %C = xor
53  ; AVX: cost of 1 {{.*}} %C = xor
54  ; AVX2: cost of 1 {{.*}} %C = xor
55  %C = xor <2 x i64> undef, undef
56  ; SSSE3: cost of 2 {{.*}} %D = xor
57  ; SSE42: cost of 2 {{.*}} %D = xor
58  ; AVX: cost of 1 {{.*}} %D = xor
59  ; AVX2: cost of 1 {{.*}} %D = xor
60  %D = xor <4 x i64> undef, undef
61  ret i32 undef
62}
63
64; CHECK-LABEL: 'mul'
65define void @mul() {
66  ; A <2 x i32> gets expanded to a <2 x i64> vector.
67  ; A <2 x i64> vector multiply is implemented using
68  ; 3 PMULUDQ and 2 PADDS and 4 shifts.
69  ; SSSE3: cost of 9 {{.*}} %A0 = mul
70  ; SSE42: cost of 9 {{.*}} %A0 = mul
71  ; AVX: cost of 9 {{.*}} %A0 = mul
72  ; AVX2: cost of 9 {{.*}} %A0 = mul
73  %A0 = mul <2 x i32> undef, undef
74  ; SSSE3: cost of 6 {{.*}} %A1 = mul
75  ; SSE42: cost of 1 {{.*}} %A1 = mul
76  ; AVX: cost of 1 {{.*}} %A1 = mul
77  ; AVX2: cost of 1 {{.*}} %A1 = mul
78  %A1 = mul <4 x i32> undef, undef
79  ; SSSE3: cost of 9 {{.*}} %A2 = mul
80  ; SSE42: cost of 9 {{.*}} %A2 = mul
81  ; AVX: cost of 9 {{.*}} %A2 = mul
82  ; AVX2: cost of 9 {{.*}} %A2 = mul
83  %A2 = mul <2 x i64> undef, undef
84  ; SSSE3: cost of 18 {{.*}} %A3 = mul
85  ; SSE42: cost of 18 {{.*}} %A3 = mul
86  ; AVX: cost of 18 {{.*}} %A3 = mul
87  ; AVX2: cost of 9 {{.*}} %A3 = mul
88  %A3 = mul <4 x i64> undef, undef
89  ret void
90}
91
92; CHECK-LABEL: 'fmul'
93define i32 @fmul(i32 %arg) {
94  ; SSSE3: cost of 2 {{.*}} %A = fmul
95  ; SSE42: cost of 2 {{.*}} %A = fmul
96  ; AVX: cost of 2 {{.*}} %A = fmul
97  ; AVX2: cost of 2 {{.*}} %A = fmul
98  %A = fmul <4 x float> undef, undef
99  ; SSSE3: cost of 4 {{.*}} %B = fmul
100  ; SSE42: cost of 4 {{.*}} %B = fmul
101  ; AVX: cost of 2 {{.*}} %B = fmul
102  ; AVX2: cost of 2 {{.*}} %B = fmul
103  %B = fmul <8 x float> undef, undef
104  ret i32 undef
105}
106
107; CHECK-LABEL: 'shift'
108define void @shift() {
109  ; SSSE3: cost of 10 {{.*}} %A0 = shl
110  ; SSE42: cost of 10 {{.*}} %A0 = shl
111  ; AVX: cost of 10 {{.*}} %A0 = shl
112  ; AVX2: cost of 1 {{.*}} %A0 = shl
113  %A0 = shl <4 x i32> undef, undef
114  ; SSSE3: cost of 4 {{.*}} %A1 = shl
115  ; SSE42: cost of 4 {{.*}} %A1 = shl
116  ; AVX: cost of 4 {{.*}} %A1 = shl
117  ; AVX2: cost of 1 {{.*}} %A1 = shl
118  %A1 = shl <2 x i64> undef, undef
119  ; SSSE3: cost of 20 {{.*}} %A2 = shl
120  ; SSE42: cost of 20 {{.*}} %A2 = shl
121  ; AVX: cost of 20 {{.*}} %A2 = shl
122  ; AVX2: cost of 1 {{.*}} %A2 = shl
123  %A2 = shl <8 x i32> undef, undef
124  ; SSSE3: cost of 8 {{.*}} %A3 = shl
125  ; SSE42: cost of 8 {{.*}} %A3 = shl
126  ; AVX: cost of 8 {{.*}} %A3 = shl
127  ; AVX2: cost of 1 {{.*}} %A3 = shl
128  %A3 = shl <4 x i64> undef, undef
129
130  ; SSSE3: cost of 16 {{.*}} %B0 = lshr
131  ; SSE42: cost of 16 {{.*}} %B0 = lshr
132  ; AVX: cost of 16 {{.*}} %B0 = lshr
133  ; AVX2: cost of 1 {{.*}} %B0 = lshr
134  %B0 = lshr <4 x i32> undef, undef
135  ; SSSE3: cost of 4 {{.*}} %B1 = lshr
136  ; SSE42: cost of 4 {{.*}} %B1 = lshr
137  ; AVX: cost of 4 {{.*}} %B1 = lshr
138  ; AVX2: cost of 1 {{.*}} %B1 = lshr
139  %B1 = lshr <2 x i64> undef, undef
140  ; SSSE3: cost of 32 {{.*}} %B2 = lshr
141  ; SSE42: cost of 32 {{.*}} %B2 = lshr
142  ; AVX: cost of 32 {{.*}} %B2 = lshr
143  ; AVX2: cost of 1 {{.*}} %B2 = lshr
144  %B2 = lshr <8 x i32> undef, undef
145  ; SSSE3: cost of 8 {{.*}} %B3 = lshr
146  ; SSE42: cost of 8 {{.*}} %B3 = lshr
147  ; AVX: cost of 8 {{.*}} %B3 = lshr
148  ; AVX2: cost of 1 {{.*}} %B3 = lshr
149  %B3 = lshr <4 x i64> undef, undef
150
151  ; SSSE3: cost of 16 {{.*}} %C0 = ashr
152  ; SSE42: cost of 16 {{.*}} %C0 = ashr
153  ; AVX: cost of 16 {{.*}} %C0 = ashr
154  ; AVX2: cost of 1 {{.*}} %C0 = ashr
155  %C0 = ashr <4 x i32> undef, undef
156  ; SSSE3: cost of 12 {{.*}} %C1 = ashr
157  ; SSE42: cost of 12 {{.*}} %C1 = ashr
158  ; AVX: cost of 12 {{.*}} %C1 = ashr
159  ; AVX2: cost of 4 {{.*}} %C1 = ashr
160  %C1 = ashr <2 x i64> undef, undef
161  ; SSSE3: cost of 32 {{.*}} %C2 = ashr
162  ; SSE42: cost of 32 {{.*}} %C2 = ashr
163  ; AVX: cost of 32 {{.*}} %C2 = ashr
164  ; AVX2: cost of 1 {{.*}} %C2 = ashr
165  %C2 = ashr <8 x i32> undef, undef
166  ; SSSE3: cost of 24 {{.*}} %C3 = ashr
167  ; SSE42: cost of 24 {{.*}} %C3 = ashr
168  ; AVX: cost of 24 {{.*}} %C3 = ashr
169  ; AVX2: cost of 4 {{.*}} %C3 = ashr
170  %C3 = ashr <4 x i64> undef, undef
171
172  ret void
173}
174