1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX1
6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
7
8; Verify the cost of vector population count instructions.
9
10declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
11declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
12declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
13declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
14
15declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
16declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
17declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
18declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
19
20define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
21; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
22; SSE: Found an estimated cost of 2 for instruction:   %ctpop
23; AVX: Found an estimated cost of 2 for instruction:   %ctpop
24; XOP: Found an estimated cost of 2 for instruction:   %ctpop
25  %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
26  ret <2 x i64> %ctpop
27}
28
29define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
30; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
31; SSE: Found an estimated cost of 4 for instruction:   %ctpop
32; AVX: Found an estimated cost of 2 for instruction:   %ctpop
33; XOP: Found an estimated cost of 2 for instruction:   %ctpop
34  %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
35  ret <4 x i64> %ctpop
36}
37
38define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
39; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
40; SSE: Found an estimated cost of 2 for instruction:   %ctpop
41; AVX: Found an estimated cost of 2 for instruction:   %ctpop
42; XOP: Found an estimated cost of 2 for instruction:   %ctpop
43  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
44  ret <4 x i32> %ctpop
45}
46
47define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
48; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
49; SSE: Found an estimated cost of 4 for instruction:   %ctpop
50; AVX: Found an estimated cost of 2 for instruction:   %ctpop
51; XOP: Found an estimated cost of 2 for instruction:   %ctpop
52  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
53  ret <8 x i32> %ctpop
54}
55
56define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
57; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
58; SSE: Found an estimated cost of 2 for instruction:   %ctpop
59; AVX: Found an estimated cost of 2 for instruction:   %ctpop
60; XOP: Found an estimated cost of 2 for instruction:   %ctpop
61  %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
62  ret <8 x i16> %ctpop
63}
64
65define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
66; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
67; SSE: Found an estimated cost of 4 for instruction:   %ctpop
68; AVX: Found an estimated cost of 2 for instruction:   %ctpop
69; XOP: Found an estimated cost of 2 for instruction:   %ctpop
70  %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
71  ret <16 x i16> %ctpop
72}
73
74define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
75; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
76; SSE: Found an estimated cost of 2 for instruction:   %ctpop
77; AVX: Found an estimated cost of 2 for instruction:   %ctpop
78; XOP: Found an estimated cost of 2 for instruction:   %ctpop
79  %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
80  ret <16 x i8> %ctpop
81}
82
83define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
84; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
85; SSE: Found an estimated cost of 4 for instruction:   %ctpop
86; AVX: Found an estimated cost of 2 for instruction:   %ctpop
87; XOP: Found an estimated cost of 2 for instruction:   %ctpop
88  %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
89  ret <32 x i8> %ctpop
90}
91
92; Verify the cost of vector leading zero count instructions.
93
94declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
95declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
96declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
97declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
98
99declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
100declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
101declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
102declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
103
104define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
105; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
106; SSE: Found an estimated cost of 6 for instruction:   %ctlz
107; AVX: Found an estimated cost of 6 for instruction:   %ctlz
108; XOP: Found an estimated cost of 6 for instruction:   %ctlz
109  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
110  ret <2 x i64> %ctlz
111}
112
113define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
114; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
115; SSE: Found an estimated cost of 6 for instruction:   %ctlz
116; AVX: Found an estimated cost of 6 for instruction:   %ctlz
117; XOP: Found an estimated cost of 6 for instruction:   %ctlz
118  %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
119  ret <2 x i64> %ctlz
120}
121
122define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
123; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
124; SSE: Found an estimated cost of 12 for instruction:   %ctlz
125; AVX: Found an estimated cost of 12 for instruction:   %ctlz
126; XOP: Found an estimated cost of 12 for instruction:   %ctlz
127  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
128  ret <4 x i64> %ctlz
129}
130
131define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
132; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
133; SSE: Found an estimated cost of 12 for instruction:   %ctlz
134; AVX: Found an estimated cost of 12 for instruction:   %ctlz
135; XOP: Found an estimated cost of 12 for instruction:   %ctlz
136  %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
137  ret <4 x i64> %ctlz
138}
139
140define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
141; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
142; SSE: Found an estimated cost of 12 for instruction:   %ctlz
143; AVX: Found an estimated cost of 12 for instruction:   %ctlz
144; XOP: Found an estimated cost of 12 for instruction:   %ctlz
145  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
146  ret <4 x i32> %ctlz
147}
148
149define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
150; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
151; SSE: Found an estimated cost of 12 for instruction:   %ctlz
152; AVX: Found an estimated cost of 12 for instruction:   %ctlz
153; XOP: Found an estimated cost of 12 for instruction:   %ctlz
154  %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
155  ret <4 x i32> %ctlz
156}
157
158define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
159; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
160; SSE: Found an estimated cost of 24 for instruction:   %ctlz
161; AVX: Found an estimated cost of 24 for instruction:   %ctlz
162; XOP: Found an estimated cost of 24 for instruction:   %ctlz
163  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
164  ret <8 x i32> %ctlz
165}
166
167define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
168; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
169; SSE: Found an estimated cost of 24 for instruction:   %ctlz
170; AVX: Found an estimated cost of 24 for instruction:   %ctlz
171; XOP: Found an estimated cost of 24 for instruction:   %ctlz
172  %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
173  ret <8 x i32> %ctlz
174}
175
176define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
177; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
178; SSE: Found an estimated cost of 24 for instruction:   %ctlz
179; AVX: Found an estimated cost of 24 for instruction:   %ctlz
180; XOP: Found an estimated cost of 24 for instruction:   %ctlz
181  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
182  ret <8 x i16> %ctlz
183}
184
185define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
186; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
187; SSE: Found an estimated cost of 24 for instruction:   %ctlz
188; AVX: Found an estimated cost of 24 for instruction:   %ctlz
189; XOP: Found an estimated cost of 24 for instruction:   %ctlz
190  %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
191  ret <8 x i16> %ctlz
192}
193
194define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
195; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
196; SSE: Found an estimated cost of 48 for instruction:   %ctlz
197; AVX: Found an estimated cost of 48 for instruction:   %ctlz
198; XOP: Found an estimated cost of 48 for instruction:   %ctlz
199  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
200  ret <16 x i16> %ctlz
201}
202
203define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
204; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
205; SSE: Found an estimated cost of 48 for instruction:   %ctlz
206; AVX: Found an estimated cost of 48 for instruction:   %ctlz
207; XOP: Found an estimated cost of 48 for instruction:   %ctlz
208  %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
209  ret <16 x i16> %ctlz
210}
211
212define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
213; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
214; SSE: Found an estimated cost of 48 for instruction:   %ctlz
215; AVX: Found an estimated cost of 48 for instruction:   %ctlz
216; XOP: Found an estimated cost of 48 for instruction:   %ctlz
217  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
218  ret <16 x i8> %ctlz
219}
220
221define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
222; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
223; SSE: Found an estimated cost of 48 for instruction:   %ctlz
224; AVX: Found an estimated cost of 48 for instruction:   %ctlz
225; XOP: Found an estimated cost of 48 for instruction:   %ctlz
226  %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
227  ret <16 x i8> %ctlz
228}
229
230define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
231; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
232; SSE: Found an estimated cost of 96 for instruction:   %ctlz
233; AVX: Found an estimated cost of 96 for instruction:   %ctlz
234; XOP: Found an estimated cost of 96 for instruction:   %ctlz
235  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
236  ret <32 x i8> %ctlz
237}
238
239define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
240; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
241; SSE: Found an estimated cost of 96 for instruction:   %ctlz
242; AVX: Found an estimated cost of 96 for instruction:   %ctlz
243; XOP: Found an estimated cost of 96 for instruction:   %ctlz
244  %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
245  ret <32 x i8> %ctlz
246}
247
248; Verify the cost of vector trailing zero count instructions.
249
250declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
251declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
252declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
253declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
254
255declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
256declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
257declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
258declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
259
260define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
261; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
262; SSE: Found an estimated cost of 6 for instruction:   %cttz
263; AVX: Found an estimated cost of 6 for instruction:   %cttz
264; XOP: Found an estimated cost of 6 for instruction:   %cttz
265  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
266  ret <2 x i64> %cttz
267}
268
269define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
270; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
271; SSE: Found an estimated cost of 6 for instruction:   %cttz
272; AVX: Found an estimated cost of 6 for instruction:   %cttz
273; XOP: Found an estimated cost of 6 for instruction:   %cttz
274  %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
275  ret <2 x i64> %cttz
276}
277
278define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
279; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
280; SSE: Found an estimated cost of 12 for instruction:   %cttz
281; AVX: Found an estimated cost of 12 for instruction:   %cttz
282; XOP: Found an estimated cost of 12 for instruction:   %cttz
283  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
284  ret <4 x i64> %cttz
285}
286
287define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
288; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
289; SSE: Found an estimated cost of 12 for instruction:   %cttz
290; AVX: Found an estimated cost of 12 for instruction:   %cttz
291; XOP: Found an estimated cost of 12 for instruction:   %cttz
292  %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
293  ret <4 x i64> %cttz
294}
295
296define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
297; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
298; SSE: Found an estimated cost of 12 for instruction:   %cttz
299; AVX: Found an estimated cost of 12 for instruction:   %cttz
300; XOP: Found an estimated cost of 12 for instruction:   %cttz
301  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
302  ret <4 x i32> %cttz
303}
304
305define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
306; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
307; SSE: Found an estimated cost of 12 for instruction:   %cttz
308; AVX: Found an estimated cost of 12 for instruction:   %cttz
309; XOP: Found an estimated cost of 12 for instruction:   %cttz
310  %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
311  ret <4 x i32> %cttz
312}
313
314define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
315; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
316; SSE: Found an estimated cost of 24 for instruction:   %cttz
317; AVX: Found an estimated cost of 24 for instruction:   %cttz
318; XOP: Found an estimated cost of 24 for instruction:   %cttz
319  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
320  ret <8 x i32> %cttz
321}
322
323define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
324; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
325; SSE: Found an estimated cost of 24 for instruction:   %cttz
326; AVX: Found an estimated cost of 24 for instruction:   %cttz
327; XOP: Found an estimated cost of 24 for instruction:   %cttz
328  %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
329  ret <8 x i32> %cttz
330}
331
332define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
333; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
334; SSE: Found an estimated cost of 24 for instruction:   %cttz
335; AVX: Found an estimated cost of 24 for instruction:   %cttz
336; XOP: Found an estimated cost of 24 for instruction:   %cttz
337  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
338  ret <8 x i16> %cttz
339}
340
341define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
342; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
343; SSE: Found an estimated cost of 24 for instruction:   %cttz
344; AVX: Found an estimated cost of 24 for instruction:   %cttz
345; XOP: Found an estimated cost of 24 for instruction:   %cttz
346  %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
347  ret <8 x i16> %cttz
348}
349
350define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
351; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
352; SSE: Found an estimated cost of 48 for instruction:   %cttz
353; AVX: Found an estimated cost of 48 for instruction:   %cttz
354; XOP: Found an estimated cost of 48 for instruction:   %cttz
355  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
356  ret <16 x i16> %cttz
357}
358
359define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
360; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
361; SSE: Found an estimated cost of 48 for instruction:   %cttz
362; AVX: Found an estimated cost of 48 for instruction:   %cttz
363; XOP: Found an estimated cost of 48 for instruction:   %cttz
364  %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
365  ret <16 x i16> %cttz
366}
367
368define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
369; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
370; SSE: Found an estimated cost of 48 for instruction:   %cttz
371; AVX: Found an estimated cost of 48 for instruction:   %cttz
372; XOP: Found an estimated cost of 48 for instruction:   %cttz
373  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
374  ret <16 x i8> %cttz
375}
376
377define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
378; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
379; SSE: Found an estimated cost of 48 for instruction:   %cttz
380; AVX: Found an estimated cost of 48 for instruction:   %cttz
381; XOP: Found an estimated cost of 48 for instruction:   %cttz
382  %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
383  ret <16 x i8> %cttz
384}
385
386define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
387; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
388; SSE: Found an estimated cost of 96 for instruction:   %cttz
389; AVX: Found an estimated cost of 96 for instruction:   %cttz
390; XOP: Found an estimated cost of 96 for instruction:   %cttz
391  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
392  ret <32 x i8> %cttz
393}
394
395define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
396; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
397; SSE: Found an estimated cost of 96 for instruction:   %cttz
398; AVX: Found an estimated cost of 96 for instruction:   %cttz
399; XOP: Found an estimated cost of 96 for instruction:   %cttz
400  %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
401  ret <32 x i8> %cttz
402}
403