1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
7
8; Verify the cost of vector arithmetic shift right instructions.
9
10;
11; Variable Shifts
12;
13
14define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
15; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
16; SSE2: Found an estimated cost of 12 for instruction:   %shift
17; SSE41: Found an estimated cost of 12 for instruction:   %shift
18; AVX: Found an estimated cost of 12 for instruction:   %shift
19; AVX2: Found an estimated cost of 4 for instruction:   %shift
20; XOP: Found an estimated cost of 2 for instruction:   %shift
21  %shift = ashr <2 x i64> %a, %b
22  ret <2 x i64> %shift
23}
24
25define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
26; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
27; SSE2: Found an estimated cost of 24 for instruction:   %shift
28; SSE41: Found an estimated cost of 24 for instruction:   %shift
29; AVX: Found an estimated cost of 24 for instruction:   %shift
30; AVX2: Found an estimated cost of 4 for instruction:   %shift
31; XOP: Found an estimated cost of 4 for instruction:   %shift
32  %shift = ashr <4 x i64> %a, %b
33  ret <4 x i64> %shift
34}
35
36define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
37; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
38; SSE2: Found an estimated cost of 16 for instruction:   %shift
39; SSE41: Found an estimated cost of 16 for instruction:   %shift
40; AVX: Found an estimated cost of 16 for instruction:   %shift
41; AVX2: Found an estimated cost of 1 for instruction:   %shift
42; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
43; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
44  %shift = ashr <4 x i32> %a, %b
45  ret <4 x i32> %shift
46}
47
48define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
49; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
50; SSE2: Found an estimated cost of 32 for instruction:   %shift
51; SSE41: Found an estimated cost of 32 for instruction:   %shift
52; AVX: Found an estimated cost of 32 for instruction:   %shift
53; AVX2: Found an estimated cost of 1 for instruction:   %shift
54; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
55; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
56  %shift = ashr <8 x i32> %a, %b
57  ret <8 x i32> %shift
58}
59
60define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
61; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
62; SSE2: Found an estimated cost of 32 for instruction:   %shift
63; SSE41: Found an estimated cost of 32 for instruction:   %shift
64; AVX: Found an estimated cost of 32 for instruction:   %shift
65; AVX2: Found an estimated cost of 32 for instruction:   %shift
66; XOP: Found an estimated cost of 2 for instruction:   %shift
67  %shift = ashr <8 x i16> %a, %b
68  ret <8 x i16> %shift
69}
70
71define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
72; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
73; SSE2: Found an estimated cost of 64 for instruction:   %shift
74; SSE41: Found an estimated cost of 64 for instruction:   %shift
75; AVX: Found an estimated cost of 64 for instruction:   %shift
76; AVX2: Found an estimated cost of 10 for instruction:   %shift
77; XOP: Found an estimated cost of 4 for instruction:   %shift
78  %shift = ashr <16 x i16> %a, %b
79  ret <16 x i16> %shift
80}
81
82define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
83; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
84; SSE2: Found an estimated cost of 54 for instruction:   %shift
85; SSE41: Found an estimated cost of 54 for instruction:   %shift
86; AVX: Found an estimated cost of 54 for instruction:   %shift
87; AVX2: Found an estimated cost of 54 for instruction:   %shift
88; XOP: Found an estimated cost of 2 for instruction:   %shift
89  %shift = ashr <16 x i8> %a, %b
90  ret <16 x i8> %shift
91}
92
93define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
94; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
95; SSE2: Found an estimated cost of 108 for instruction:   %shift
96; SSE41: Found an estimated cost of 108 for instruction:   %shift
97; AVX: Found an estimated cost of 108 for instruction:   %shift
98; AVX2: Found an estimated cost of 24 for instruction:   %shift
99; XOP: Found an estimated cost of 4 for instruction:   %shift
100  %shift = ashr <32 x i8> %a, %b
101  ret <32 x i8> %shift
102}
103
104;
105; Uniform Variable Shifts
106;
107
108define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
109; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
110; SSE2: Found an estimated cost of 12 for instruction:   %shift
111; SSE41: Found an estimated cost of 12 for instruction:   %shift
112; AVX: Found an estimated cost of 12 for instruction:   %shift
113; AVX2: Found an estimated cost of 4 for instruction:   %shift
114; XOP: Found an estimated cost of 2 for instruction:   %shift
115  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
116  %shift = ashr <2 x i64> %a, %splat
117  ret <2 x i64> %shift
118}
119
120define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
121; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
122; SSE2: Found an estimated cost of 24 for instruction:   %shift
123; SSE41: Found an estimated cost of 24 for instruction:   %shift
124; AVX: Found an estimated cost of 24 for instruction:   %shift
125; AVX2: Found an estimated cost of 4 for instruction:   %shift
126; XOP: Found an estimated cost of 4 for instruction:   %shift
127  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
128  %shift = ashr <4 x i64> %a, %splat
129  ret <4 x i64> %shift
130}
131
132define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
133; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
134; SSE2: Found an estimated cost of 16 for instruction:   %shift
135; SSE41: Found an estimated cost of 16 for instruction:   %shift
136; AVX: Found an estimated cost of 16 for instruction:   %shift
137; AVX2: Found an estimated cost of 1 for instruction:   %shift
138; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
139; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
140  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
141  %shift = ashr <4 x i32> %a, %splat
142  ret <4 x i32> %shift
143}
144
145define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
146; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
147; SSE2: Found an estimated cost of 32 for instruction:   %shift
148; SSE41: Found an estimated cost of 32 for instruction:   %shift
149; AVX: Found an estimated cost of 32 for instruction:   %shift
150; AVX2: Found an estimated cost of 1 for instruction:   %shift
151; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
152; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
153  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
154  %shift = ashr <8 x i32> %a, %splat
155  ret <8 x i32> %shift
156}
157
158define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
159; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
160; SSE2: Found an estimated cost of 32 for instruction:   %shift
161; SSE41: Found an estimated cost of 32 for instruction:   %shift
162; AVX: Found an estimated cost of 32 for instruction:   %shift
163; AVX2: Found an estimated cost of 32 for instruction:   %shift
164; XOP: Found an estimated cost of 2 for instruction:   %shift
165  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
166  %shift = ashr <8 x i16> %a, %splat
167  ret <8 x i16> %shift
168}
169
170define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
171; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
172; SSE2: Found an estimated cost of 64 for instruction:   %shift
173; SSE41: Found an estimated cost of 64 for instruction:   %shift
174; AVX: Found an estimated cost of 64 for instruction:   %shift
175; AVX2: Found an estimated cost of 10 for instruction:   %shift
176; XOP: Found an estimated cost of 4 for instruction:   %shift
177  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
178  %shift = ashr <16 x i16> %a, %splat
179  ret <16 x i16> %shift
180}
181
182define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
183; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
184; SSE2: Found an estimated cost of 54 for instruction:   %shift
185; SSE41: Found an estimated cost of 54 for instruction:   %shift
186; AVX: Found an estimated cost of 54 for instruction:   %shift
187; AVX2: Found an estimated cost of 54 for instruction:   %shift
188; XOP: Found an estimated cost of 2 for instruction:   %shift
189  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
190  %shift = ashr <16 x i8> %a, %splat
191  ret <16 x i8> %shift
192}
193
194define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
195; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
196; SSE2: Found an estimated cost of 108 for instruction:   %shift
197; SSE41: Found an estimated cost of 108 for instruction:   %shift
198; AVX: Found an estimated cost of 108 for instruction:   %shift
199; AVX2: Found an estimated cost of 24 for instruction:   %shift
200; XOP: Found an estimated cost of 4 for instruction:   %shift
201  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
202  %shift = ashr <32 x i8> %a, %splat
203  ret <32 x i8> %shift
204}
205
206;
207; Constant Shifts
208;
209
210define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
211; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
212; SSE2: Found an estimated cost of 12 for instruction:   %shift
213; SSE41: Found an estimated cost of 12 for instruction:   %shift
214; AVX: Found an estimated cost of 12 for instruction:   %shift
215; AVX2: Found an estimated cost of 4 for instruction:   %shift
216; XOP: Found an estimated cost of 2 for instruction:   %shift
217  %shift = ashr <2 x i64> %a, <i64 1, i64 7>
218  ret <2 x i64> %shift
219}
220
221define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
222; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
223; SSE2: Found an estimated cost of 24 for instruction:   %shift
224; SSE41: Found an estimated cost of 24 for instruction:   %shift
225; AVX: Found an estimated cost of 24 for instruction:   %shift
226; AVX2: Found an estimated cost of 4 for instruction:   %shift
227; XOP: Found an estimated cost of 4 for instruction:   %shift
228  %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
229  ret <4 x i64> %shift
230}
231
232define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
233; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
234; SSE2: Found an estimated cost of 16 for instruction:   %shift
235; SSE41: Found an estimated cost of 16 for instruction:   %shift
236; AVX: Found an estimated cost of 16 for instruction:   %shift
237; AVX2: Found an estimated cost of 1 for instruction:   %shift
238; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
239; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
240  %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
241  ret <4 x i32> %shift
242}
243
244define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
245; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
246; SSE2: Found an estimated cost of 32 for instruction:   %shift
247; SSE41: Found an estimated cost of 32 for instruction:   %shift
248; AVX: Found an estimated cost of 32 for instruction:   %shift
249; AVX2: Found an estimated cost of 1 for instruction:   %shift
250; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
251; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
252  %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
253  ret <8 x i32> %shift
254}
255
256define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
257; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
258; SSE2: Found an estimated cost of 32 for instruction:   %shift
259; SSE41: Found an estimated cost of 32 for instruction:   %shift
260; AVX: Found an estimated cost of 32 for instruction:   %shift
261; AVX2: Found an estimated cost of 32 for instruction:   %shift
262; XOP: Found an estimated cost of 2 for instruction:   %shift
263  %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
264  ret <8 x i16> %shift
265}
266
267define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
268; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
269; SSE2: Found an estimated cost of 64 for instruction:   %shift
270; SSE41: Found an estimated cost of 64 for instruction:   %shift
271; AVX: Found an estimated cost of 64 for instruction:   %shift
272; AVX2: Found an estimated cost of 10 for instruction:   %shift
273; XOP: Found an estimated cost of 4 for instruction:   %shift
274  %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
275  ret <16 x i16> %shift
276}
277
278define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
279; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
280; SSE2: Found an estimated cost of 54 for instruction:   %shift
281; SSE41: Found an estimated cost of 54 for instruction:   %shift
282; AVX: Found an estimated cost of 54 for instruction:   %shift
283; AVX2: Found an estimated cost of 54 for instruction:   %shift
284; XOP: Found an estimated cost of 2 for instruction:   %shift
285  %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
286  ret <16 x i8> %shift
287}
288
289define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
290; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
291; SSE2: Found an estimated cost of 108 for instruction:   %shift
292; SSE41: Found an estimated cost of 108 for instruction:   %shift
293; AVX: Found an estimated cost of 108 for instruction:   %shift
294; AVX2: Found an estimated cost of 24 for instruction:   %shift
295; XOP: Found an estimated cost of 4 for instruction:   %shift
296  %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
297  ret <32 x i8> %shift
298}
299
300;
301; Uniform Constant Shifts
302;
303
304define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
305; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
306; SSE2: Found an estimated cost of 4 for instruction:   %shift
307; SSE41: Found an estimated cost of 4 for instruction:   %shift
308; AVX: Found an estimated cost of 4 for instruction:   %shift
309; AVX2: Found an estimated cost of 4 for instruction:   %shift
310; XOP: Found an estimated cost of 2 for instruction:   %shift
311  %shift = ashr <2 x i64> %a, <i64 7, i64 7>
312  ret <2 x i64> %shift
313}
314
315define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
316; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
317; SSE2: Found an estimated cost of 8 for instruction:   %shift
318; SSE41: Found an estimated cost of 8 for instruction:   %shift
319; AVX: Found an estimated cost of 8 for instruction:   %shift
320; AVX2: Found an estimated cost of 4 for instruction:   %shift
321; XOP: Found an estimated cost of 4 for instruction:   %shift
322  %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
323  ret <4 x i64> %shift
324}
325
326define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
327; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
328; SSE2: Found an estimated cost of 1 for instruction:   %shift
329; SSE41: Found an estimated cost of 1 for instruction:   %shift
330; AVX: Found an estimated cost of 1 for instruction:   %shift
331; AVX2: Found an estimated cost of 1 for instruction:   %shift
332; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
333; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
334  %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
335  ret <4 x i32> %shift
336}
337
338define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
339; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
340; SSE2: Found an estimated cost of 2 for instruction:   %shift
341; SSE41: Found an estimated cost of 2 for instruction:   %shift
342; AVX: Found an estimated cost of 2 for instruction:   %shift
343; AVX2: Found an estimated cost of 1 for instruction:   %shift
344; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
345; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
346  %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
347  ret <8 x i32> %shift
348}
349
350define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
351; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
352; SSE2: Found an estimated cost of 1 for instruction:   %shift
353; SSE41: Found an estimated cost of 1 for instruction:   %shift
354; AVX: Found an estimated cost of 1 for instruction:   %shift
355; AVX2: Found an estimated cost of 1 for instruction:   %shift
356; XOP: Found an estimated cost of 2 for instruction:   %shift
357  %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
358  ret <8 x i16> %shift
359}
360
361define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
362; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
363; SSE2: Found an estimated cost of 2 for instruction:   %shift
364; SSE41: Found an estimated cost of 2 for instruction:   %shift
365; AVX: Found an estimated cost of 2 for instruction:   %shift
366; AVX2: Found an estimated cost of 10 for instruction:   %shift
367; XOP: Found an estimated cost of 4 for instruction:   %shift
368  %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
369  ret <16 x i16> %shift
370}
371
372define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
373; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
374; SSE2: Found an estimated cost of 4 for instruction:   %shift
375; SSE41: Found an estimated cost of 4 for instruction:   %shift
376; AVX: Found an estimated cost of 4 for instruction:   %shift
377; AVX2: Found an estimated cost of 4 for instruction:   %shift
378; XOP: Found an estimated cost of 2 for instruction:   %shift
379  %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
380  ret <16 x i8> %shift
381}
382
383define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
384; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
385; SSE2: Found an estimated cost of 8 for instruction:   %shift
386; SSE41: Found an estimated cost of 8 for instruction:   %shift
387; AVX: Found an estimated cost of 8 for instruction:   %shift
388; AVX2: Found an estimated cost of 24 for instruction:   %shift
389; XOP: Found an estimated cost of 4 for instruction:   %shift
390  %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
391  ret <32 x i8> %shift
392}
393