1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
7
8; Verify the cost of vector logical shift right instructions.
9
10;
11; Variable Shifts
12;
13
14define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
15; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
16; SSE2: Found an estimated cost of 4 for instruction:   %shift
17; SSE41: Found an estimated cost of 4 for instruction:   %shift
18; AVX: Found an estimated cost of 4 for instruction:   %shift
19; AVX2: Found an estimated cost of 1 for instruction:   %shift
20; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
21; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
22  %shift = lshr <2 x i64> %a, %b
23  ret <2 x i64> %shift
24}
25
26define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
27; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
28; SSE2: Found an estimated cost of 8 for instruction:   %shift
29; SSE41: Found an estimated cost of 8 for instruction:   %shift
30; AVX: Found an estimated cost of 8 for instruction:   %shift
31; AVX2: Found an estimated cost of 1 for instruction:   %shift
32; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
33; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
34  %shift = lshr <4 x i64> %a, %b
35  ret <4 x i64> %shift
36}
37
38define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
39; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
40; SSE2: Found an estimated cost of 16 for instruction:   %shift
41; SSE41: Found an estimated cost of 16 for instruction:   %shift
42; AVX: Found an estimated cost of 16 for instruction:   %shift
43; AVX2: Found an estimated cost of 1 for instruction:   %shift
44; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
45; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
46  %shift = lshr <4 x i32> %a, %b
47  ret <4 x i32> %shift
48}
49
50define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
51; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
52; SSE2: Found an estimated cost of 32 for instruction:   %shift
53; SSE41: Found an estimated cost of 32 for instruction:   %shift
54; AVX: Found an estimated cost of 32 for instruction:   %shift
55; AVX2: Found an estimated cost of 1 for instruction:   %shift
56; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
57; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
58  %shift = lshr <8 x i32> %a, %b
59  ret <8 x i32> %shift
60}
61
62define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
63; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
64; SSE2: Found an estimated cost of 32 for instruction:   %shift
65; SSE41: Found an estimated cost of 32 for instruction:   %shift
66; AVX: Found an estimated cost of 32 for instruction:   %shift
67; AVX2: Found an estimated cost of 32 for instruction:   %shift
68; XOP: Found an estimated cost of 2 for instruction:   %shift
69  %shift = lshr <8 x i16> %a, %b
70  ret <8 x i16> %shift
71}
72
73define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
74; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
75; SSE2: Found an estimated cost of 64 for instruction:   %shift
76; SSE41: Found an estimated cost of 64 for instruction:   %shift
77; AVX: Found an estimated cost of 64 for instruction:   %shift
78; AVX2: Found an estimated cost of 10 for instruction:   %shift
79; XOP: Found an estimated cost of 4 for instruction:   %shift
80  %shift = lshr <16 x i16> %a, %b
81  ret <16 x i16> %shift
82}
83
84define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
85; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
86; SSE2: Found an estimated cost of 26 for instruction:   %shift
87; SSE41: Found an estimated cost of 26 for instruction:   %shift
88; AVX: Found an estimated cost of 26 for instruction:   %shift
89; AVX2: Found an estimated cost of 26 for instruction:   %shift
90; XOP: Found an estimated cost of 2 for instruction:   %shift
91  %shift = lshr <16 x i8> %a, %b
92  ret <16 x i8> %shift
93}
94
95define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
96; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
97; SSE2: Found an estimated cost of 52 for instruction:   %shift
98; SSE41: Found an estimated cost of 52 for instruction:   %shift
99; AVX: Found an estimated cost of 52 for instruction:   %shift
100; AVX2: Found an estimated cost of 11 for instruction:   %shift
101; XOP: Found an estimated cost of 4 for instruction:   %shift
102  %shift = lshr <32 x i8> %a, %b
103  ret <32 x i8> %shift
104}
105
106;
107; Uniform Variable Shifts
108;
109
110define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
111; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
112; SSE2: Found an estimated cost of 4 for instruction:   %shift
113; SSE41: Found an estimated cost of 4 for instruction:   %shift
114; AVX: Found an estimated cost of 4 for instruction:   %shift
115; AVX2: Found an estimated cost of 1 for instruction:   %shift
116; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
117; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
118  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
119  %shift = lshr <2 x i64> %a, %splat
120  ret <2 x i64> %shift
121}
122
123define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
124; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
125; SSE2: Found an estimated cost of 8 for instruction:   %shift
126; SSE41: Found an estimated cost of 8 for instruction:   %shift
127; AVX: Found an estimated cost of 8 for instruction:   %shift
128; AVX2: Found an estimated cost of 1 for instruction:   %shift
129; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
130; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
131  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
132  %shift = lshr <4 x i64> %a, %splat
133  ret <4 x i64> %shift
134}
135
136define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
137; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
138; SSE2: Found an estimated cost of 16 for instruction:   %shift
139; SSE41: Found an estimated cost of 16 for instruction:   %shift
140; AVX: Found an estimated cost of 16 for instruction:   %shift
141; AVX2: Found an estimated cost of 1 for instruction:   %shift
142; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
143; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
144  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
145  %shift = lshr <4 x i32> %a, %splat
146  ret <4 x i32> %shift
147}
148
149define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
150; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
151; SSE2: Found an estimated cost of 32 for instruction:   %shift
152; SSE41: Found an estimated cost of 32 for instruction:   %shift
153; AVX: Found an estimated cost of 32 for instruction:   %shift
154; AVX2: Found an estimated cost of 1 for instruction:   %shift
155; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
156; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
157  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
158  %shift = lshr <8 x i32> %a, %splat
159  ret <8 x i32> %shift
160}
161
162define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
163; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
164; SSE2: Found an estimated cost of 32 for instruction:   %shift
165; SSE41: Found an estimated cost of 32 for instruction:   %shift
166; AVX: Found an estimated cost of 32 for instruction:   %shift
167; AVX2: Found an estimated cost of 32 for instruction:   %shift
168; XOP: Found an estimated cost of 2 for instruction:   %shift
169  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
170  %shift = lshr <8 x i16> %a, %splat
171  ret <8 x i16> %shift
172}
173
174define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
175; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
176; SSE2: Found an estimated cost of 64 for instruction:   %shift
177; SSE41: Found an estimated cost of 64 for instruction:   %shift
178; AVX: Found an estimated cost of 64 for instruction:   %shift
179; AVX2: Found an estimated cost of 10 for instruction:   %shift
180; XOP: Found an estimated cost of 4 for instruction:   %shift
181  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
182  %shift = lshr <16 x i16> %a, %splat
183  ret <16 x i16> %shift
184}
185
186define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
187; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
188; SSE2: Found an estimated cost of 26 for instruction:   %shift
189; SSE41: Found an estimated cost of 26 for instruction:   %shift
190; AVX: Found an estimated cost of 26 for instruction:   %shift
191; AVX2: Found an estimated cost of 26 for instruction:   %shift
192; XOP: Found an estimated cost of 2 for instruction:   %shift
193  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
194  %shift = lshr <16 x i8> %a, %splat
195  ret <16 x i8> %shift
196}
197
198define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
199; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
200; SSE2: Found an estimated cost of 52 for instruction:   %shift
201; SSE41: Found an estimated cost of 52 for instruction:   %shift
202; AVX: Found an estimated cost of 52 for instruction:   %shift
203; AVX2: Found an estimated cost of 11 for instruction:   %shift
204; XOP: Found an estimated cost of 4 for instruction:   %shift
205  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
206  %shift = lshr <32 x i8> %a, %splat
207  ret <32 x i8> %shift
208}
209
210;
211; Constant Shifts
212;
213
214define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
215; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
216; SSE2: Found an estimated cost of 4 for instruction:   %shift
217; SSE41: Found an estimated cost of 4 for instruction:   %shift
218; AVX: Found an estimated cost of 4 for instruction:   %shift
219; AVX2: Found an estimated cost of 1 for instruction:   %shift
220; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
221; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
222  %shift = lshr <2 x i64> %a, <i64 1, i64 7>
223  ret <2 x i64> %shift
224}
225
226define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
227; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
228; SSE2: Found an estimated cost of 8 for instruction:   %shift
229; SSE41: Found an estimated cost of 8 for instruction:   %shift
230; AVX: Found an estimated cost of 8 for instruction:   %shift
231; AVX2: Found an estimated cost of 1 for instruction:   %shift
232; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
233; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
234  %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
235  ret <4 x i64> %shift
236}
237
238define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
239; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
240; SSE2: Found an estimated cost of 16 for instruction:   %shift
241; SSE41: Found an estimated cost of 16 for instruction:   %shift
242; AVX: Found an estimated cost of 16 for instruction:   %shift
243; AVX2: Found an estimated cost of 1 for instruction:   %shift
244; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
245; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
246  %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
247  ret <4 x i32> %shift
248}
249
250define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
251; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
252; SSE2: Found an estimated cost of 32 for instruction:   %shift
253; SSE41: Found an estimated cost of 32 for instruction:   %shift
254; AVX: Found an estimated cost of 32 for instruction:   %shift
255; AVX2: Found an estimated cost of 1 for instruction:   %shift
256; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
257; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
258  %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
259  ret <8 x i32> %shift
260}
261
262define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
263; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
264; SSE2: Found an estimated cost of 32 for instruction:   %shift
265; SSE41: Found an estimated cost of 32 for instruction:   %shift
266; AVX: Found an estimated cost of 32 for instruction:   %shift
267; AVX2: Found an estimated cost of 32 for instruction:   %shift
268; XOP: Found an estimated cost of 2 for instruction:   %shift
269  %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
270  ret <8 x i16> %shift
271}
272
273define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
274; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
275; SSE2: Found an estimated cost of 64 for instruction:   %shift
276; SSE41: Found an estimated cost of 64 for instruction:   %shift
277; AVX: Found an estimated cost of 64 for instruction:   %shift
278; AVX2: Found an estimated cost of 10 for instruction:   %shift
279; XOP: Found an estimated cost of 4 for instruction:   %shift
280  %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
281  ret <16 x i16> %shift
282}
283
284define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
285; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
286; SSE2: Found an estimated cost of 26 for instruction:   %shift
287; SSE41: Found an estimated cost of 26 for instruction:   %shift
288; AVX: Found an estimated cost of 26 for instruction:   %shift
289; AVX2: Found an estimated cost of 26 for instruction:   %shift
290; XOP: Found an estimated cost of 2 for instruction:   %shift
291  %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
292  ret <16 x i8> %shift
293}
294
295define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
296; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
297; SSE2: Found an estimated cost of 52 for instruction:   %shift
298; SSE41: Found an estimated cost of 52 for instruction:   %shift
299; AVX: Found an estimated cost of 52 for instruction:   %shift
300; AVX2: Found an estimated cost of 11 for instruction:   %shift
301; XOP: Found an estimated cost of 4 for instruction:   %shift
302  %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
303  ret <32 x i8> %shift
304}
305
306;
307; Uniform Constant Shifts
308;
309
310define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
311; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
312; SSE2: Found an estimated cost of 1 for instruction:   %shift
313; SSE41: Found an estimated cost of 1 for instruction:   %shift
314; AVX: Found an estimated cost of 1 for instruction:   %shift
315; AVX2: Found an estimated cost of 1 for instruction:   %shift
316; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
317; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
318  %shift = lshr <2 x i64> %a, <i64 7, i64 7>
319  ret <2 x i64> %shift
320}
321
322define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
323; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
324; SSE2: Found an estimated cost of 2 for instruction:   %shift
325; SSE41: Found an estimated cost of 2 for instruction:   %shift
326; AVX: Found an estimated cost of 2 for instruction:   %shift
327; AVX2: Found an estimated cost of 1 for instruction:   %shift
328; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
329; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
330  %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
331  ret <4 x i64> %shift
332}
333
334define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
335; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
336; SSE2: Found an estimated cost of 1 for instruction:   %shift
337; SSE41: Found an estimated cost of 1 for instruction:   %shift
338; AVX: Found an estimated cost of 1 for instruction:   %shift
339; AVX2: Found an estimated cost of 1 for instruction:   %shift
340; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
341; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
342  %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
343  ret <4 x i32> %shift
344}
345
346define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
347; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
348; SSE2: Found an estimated cost of 2 for instruction:   %shift
349; SSE41: Found an estimated cost of 2 for instruction:   %shift
350; AVX: Found an estimated cost of 2 for instruction:   %shift
351; AVX2: Found an estimated cost of 1 for instruction:   %shift
352; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
353; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
354  %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
355  ret <8 x i32> %shift
356}
357
358define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
359; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
360; SSE2: Found an estimated cost of 1 for instruction:   %shift
361; SSE41: Found an estimated cost of 1 for instruction:   %shift
362; AVX: Found an estimated cost of 1 for instruction:   %shift
363; AVX2: Found an estimated cost of 1 for instruction:   %shift
364; XOP: Found an estimated cost of 2 for instruction:   %shift
365  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
366  ret <8 x i16> %shift
367}
368
369define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
370; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
371; SSE2: Found an estimated cost of 2 for instruction:   %shift
372; SSE41: Found an estimated cost of 2 for instruction:   %shift
373; AVX: Found an estimated cost of 2 for instruction:   %shift
374; AVX2: Found an estimated cost of 10 for instruction:   %shift
375; XOP: Found an estimated cost of 4 for instruction:   %shift
376  %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
377  ret <16 x i16> %shift
378}
379
380define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
381; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
382; SSE2: Found an estimated cost of 1 for instruction:   %shift
383; SSE41: Found an estimated cost of 1 for instruction:   %shift
384; AVX: Found an estimated cost of 1 for instruction:   %shift
385; AVX2: Found an estimated cost of 1 for instruction:   %shift
386; XOP: Found an estimated cost of 2 for instruction:   %shift
387  %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
388  ret <16 x i8> %shift
389}
390
391define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
392; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
393; SSE2: Found an estimated cost of 2 for instruction:   %shift
394; SSE41: Found an estimated cost of 2 for instruction:   %shift
395; AVX: Found an estimated cost of 2 for instruction:   %shift
396; AVX2: Found an estimated cost of 11 for instruction:   %shift
397; XOP: Found an estimated cost of 4 for instruction:   %shift
398  %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
399  ret <32 x i8> %shift
400}
401