1; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
2; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
3; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.8.0"
7
8define i32 @add(i32 %arg) {
9; CHECK-LABEL: for function 'add'
10  ; -- Same size registeres --
11  ;CHECK: cost of 1 {{.*}} zext
12  %A = zext <4 x i1> undef to <4 x i32>
13  ;CHECK: cost of 2 {{.*}} sext
14  %B = sext <4 x i1> undef to <4 x i32>
15  ;CHECK: cost of 0 {{.*}} trunc
16  %C = trunc <4 x i32> undef to <4 x i1>
17
18  ; -- Different size registers --
19  ;CHECK-NOT: cost of 1 {{.*}} zext
20  %D = zext <8 x i1> undef to <8 x i32>
21  ;CHECK-NOT: cost of 2 {{.*}} sext
22  %E = sext <8 x i1> undef to <8 x i32>
23  ;CHECK-NOT: cost of 2 {{.*}} trunc
24  %F = trunc <8 x i32> undef to <8 x i1>
25
26  ; -- scalars --
27
28  ;CHECK: cost of 1 {{.*}} zext
29  %G = zext i1 undef to i32
30  ;CHECK: cost of 0 {{.*}} trunc
31  %H = trunc i32 undef to i1
32
33  ;CHECK: cost of 0 {{.*}} ret
34  ret i32 undef
35}
36
37define i32 @zext_sext(<8 x i1> %in) {
38; CHECK-AVX2-LABEL: for function 'zext_sext'
39; CHECK-AVX-LABEL: for function 'zext_sext'
40  ;CHECK-AVX2: cost of 3 {{.*}} zext
41  ;CHECK-AVX: cost of 4 {{.*}} zext
42  %Z = zext <8 x i1> %in to <8 x i32>
43  ;CHECK-AVX2: cost of 3 {{.*}} sext
44  ;CHECK-AVX: cost of 7 {{.*}} sext
45  %S = sext <8 x i1> %in to <8 x i32>
46
47  ;CHECK-AVX2: cost of 1 {{.*}} zext
48  ;CHECK-AVX: cost of 4 {{.*}} zext
49  %A1 = zext <16 x i8> undef to <16 x i16>
50  ;CHECK-AVX2: cost of 1 {{.*}} sext
51  ;CHECK-AVX: cost of 4 {{.*}} sext
52  %A2 = sext <16 x i8> undef to <16 x i16>
53  ;CHECK-AVX2: cost of 1 {{.*}} sext
54  ;CHECK-AVX: cost of 4 {{.*}} sext
55  %A = sext <8 x i16> undef to <8 x i32>
56  ;CHECK-AVX2: cost of 1 {{.*}} zext
57  ;CHECK-AVX: cost of 4 {{.*}} zext
58  %B = zext <8 x i16> undef to <8 x i32>
59  ;CHECK-AVX2: cost of 1 {{.*}} sext
60  ;CHECK-AVX: cost of 4 {{.*}} sext
61  %C = sext <4 x i32> undef to <4 x i64>
62
63  ;CHECK-AVX2: cost of 3 {{.*}} zext
64  ;CHECK-AVX: cost of 4 {{.*}} zext
65  %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
66  ;CHECK-AVX2: cost of 3 {{.*}} sext
67  ;CHECK-AVX: cost of 7 {{.*}} sext
68  %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
69  ;CHECK-AVX2: cost of 3 {{.*}} zext
70  ;CHECK-AVX: cost of 3 {{.*}} zext
71  %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
72  ;CHECK-AVX2: cost of 3 {{.*}} sext
73  ;CHECK-AVX: cost of 6 {{.*}} sext
74  %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
75
76  ;CHECK-AVX2: cost of 3 {{.*}} zext
77  ;CHECK-AVX: cost of 4 {{.*}} zext
78  %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
79  ;CHECK-AVX2: cost of 3 {{.*}} sext
80  ;CHECK-AVX: cost of 6 {{.*}} sext
81  %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
82
83  ;CHECK-AVX2: cost of 1 {{.*}} zext
84  ;CHECK-AVX: cost of 4 {{.*}} zext
85  %D = zext <4 x i32> undef to <4 x i64>
86
87  ;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext
88  %D1 = zext <8 x i32> undef to <8 x i64>
89
90  ;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext
91  %D2 = sext <8 x i32> undef to <8 x i64>
92
93  ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
94  %D3 = zext <16 x i16> undef to <16 x i32>
95  ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
96  %D4 = zext <16 x i8> undef to <16 x i32>
97  ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
98  %D5 = zext <16 x i1> undef to <16 x i32>
99
100  ;CHECK-AVX2: cost of 2 {{.*}} trunc
101  ;CHECK-AVX: cost of 4 {{.*}} trunc
102  %E = trunc <4 x i64> undef to <4 x i32>
103  ;CHECK-AVX2: cost of 2 {{.*}} trunc
104  ;CHECK-AVX: cost of 5 {{.*}} trunc
105  %F = trunc <8 x i32> undef to <8 x i16>
106  ;CHECK-AVX2: cost of 4 {{.*}} trunc
107  ;CHECK-AVX: cost of 4 {{.*}} trunc
108  %F1 = trunc <16 x i16> undef to <16 x i8>
109  ;CHECK-AVX2: cost of 2 {{.*}} trunc
110  ;CHECK-AVX: cost of 4 {{.*}} trunc
111  %F2 = trunc <8 x i32> undef to <8 x i8>
112  ;CHECK-AVX2: cost of 2 {{.*}} trunc
113  ;CHECK-AVX: cost of 4 {{.*}} trunc
114  %F3 = trunc <4 x i64> undef to <4 x i8>
115
116  ;CHECK-AVX2: cost of 4 {{.*}} trunc
117  ;CHECK-AVX: cost of 9 {{.*}} trunc
118  ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
119  %G = trunc <8 x i64> undef to <8 x i32>
120
121  ;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc
122  %G1 = trunc <16 x i32> undef to <16 x i16>
123
124  ;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc
125  %G2 = trunc <16 x i32> undef to <16 x i8>
126  ret i32 undef
127}
128
129define i32 @masks8(<8 x i1> %in) {
130; CHECK-AVX2-LABEL: for function 'masks8'
131; CHECK-AVX-LABEL: for function 'masks8'
132
133  ;CHECK-AVX2: cost of 3 {{.*}} zext
134  ;CHECK-AVX: cost of 4 {{.*}} zext
135  %Z = zext <8 x i1> %in to <8 x i32>
136  ;CHECK-AVX2: cost of 3 {{.*}} sext
137  ;CHECK-AVX: cost of 7 {{.*}} sext
138  %S = sext <8 x i1> %in to <8 x i32>
139  ret i32 undef
140}
141
142define i32 @masks4(<4 x i1> %in) {
143; CHECK-AVX2-LABEL: for function 'masks4'
144; CHECK-AVX-LABEL: for function 'masks4'
145
146  ;CHECK-AVX2: cost of 3 {{.*}} zext
147  ;CHECK-AVX: cost of 4 {{.*}} zext
148  %Z = zext <4 x i1> %in to <4 x i64>
149  ;CHECK-AVX2: cost of 3 {{.*}} sext
150  ;CHECK-AVX: cost of 6 {{.*}} sext
151  %S = sext <4 x i1> %in to <4 x i64>
152  ret i32 undef
153}
154
155define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
156; CHECK-LABEL: for function 'sitofp4'
157  ; CHECK: cost of 3 {{.*}} sitofp
158  %A1 = sitofp <4 x i1> %a to <4 x float>
159  ; CHECK: cost of 3 {{.*}} sitofp
160  %A2 = sitofp <4 x i1> %a to <4 x double>
161
162  ; CHECK: cost of 3 {{.*}} sitofp
163  %B1 = sitofp <4 x i8> %b to <4 x float>
164  ; CHECK: cost of 3 {{.*}} sitofp
165  %B2 = sitofp <4 x i8> %b to <4 x double>
166
167  ; CHECK: cost of 3 {{.*}} sitofp
168  %C1 = sitofp <4 x i16> %c to <4 x float>
169  ; CHECK: cost of 3 {{.*}} sitofp
170  %C2 = sitofp <4 x i16> %c to <4 x double>
171
172  ; CHECK: cost of 1 {{.*}} sitofp
173  %D1 = sitofp <4 x i32> %d to <4 x float>
174  ; CHECK: cost of 1 {{.*}} sitofp
175  %D2 = sitofp <4 x i32> %d to <4 x double>
176  ret void
177}
178
179define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
180; CHECK-LABEL: for function 'sitofp8'
181  ; CHECK: cost of 8 {{.*}} sitofp
182  %A1 = sitofp <8 x i1> %a to <8 x float>
183
184  ; CHECK: cost of 8 {{.*}} sitofp
185  %B1 = sitofp <8 x i8> %b to <8 x float>
186
187  ; CHECK: cost of 5 {{.*}} sitofp
188  %C1 = sitofp <8 x i16> %c to <8 x float>
189
190  ; CHECK: cost of 1 {{.*}} sitofp
191  %D1 = sitofp <8 x i32> %d to <8 x float>
192  ret void
193}
194
195define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
196; CHECK-LABEL: for function 'uitofp4'
197  ; CHECK: cost of 7 {{.*}} uitofp
198  %A1 = uitofp <4 x i1> %a to <4 x float>
199  ; CHECK: cost of 7 {{.*}} uitofp
200  %A2 = uitofp <4 x i1> %a to <4 x double>
201
202  ; CHECK: cost of 2 {{.*}} uitofp
203  %B1 = uitofp <4 x i8> %b to <4 x float>
204  ; CHECK: cost of 2 {{.*}} uitofp
205  %B2 = uitofp <4 x i8> %b to <4 x double>
206
207  ; CHECK: cost of 2 {{.*}} uitofp
208  %C1 = uitofp <4 x i16> %c to <4 x float>
209  ; CHECK: cost of 2 {{.*}} uitofp
210  %C2 = uitofp <4 x i16> %c to <4 x double>
211
212  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
213  %D1 = uitofp <4 x i32> %d to <4 x float>
214  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
215  %D2 = uitofp <4 x i32> %d to <4 x double>
216  ret void
217}
218
219define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
220; CHECK-LABEL: for function 'uitofp8'
221  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
222  %A1 = uitofp <8 x i1> %a to <8 x float>
223
224  ; CHECK-AVX2: cost of 5 {{.*}} uitofp
225  ; CHECK-AVX512: cost of 2 {{.*}} uitofp
226  %B1 = uitofp <8 x i8> %b to <8 x float>
227
228  ; CHECK-AVX2: cost of 5 {{.*}} uitofp
229  ; CHECK-AVX512: cost of 2 {{.*}} uitofp
230  %C1 = uitofp <8 x i16> %c to <8 x float>
231
232  ; CHECK-AVX2: cost of 8 {{.*}} uitofp
233  ; CHECK-AVX512: cost of 1 {{.*}} uitofp
234  ; CHECK-AVX: cost of 9 {{.*}} uitofp
235  %D1 = uitofp <8 x i32> %d to <8 x float>
236  ret void
237}
238
239define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
240;CHECK-LABEL: for function 'fp_conv'
241  ; CHECK-AVX512: cost of 1 {{.*}} fpext
242  %A1 = fpext <8 x float> %a to <8 x double>
243
244  ; CHECK-AVX512: cost of 1 {{.*}} fpext
245  %A2 = fpext <4 x float> %c to <4 x double>
246
247  ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
248  ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
249  %A3 = fpext <8 x float> %a to <8 x double>
250
251  ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
252  ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
253  %A4 = fptrunc <8 x double> undef to <8 x float>
254
255  ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
256  %A5 = fptrunc <4 x double> undef to <4 x float>
257  ret void
258}
259