1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       ## BB#0:
12; ALL-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = shl <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       ## BB#0:
21; ALL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = shl <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       ## BB#0:
30; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
31; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
32; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15]
33; AVX512DQ-NEXT:    vpsllvd %ymm5, %ymm6, %ymm5
34; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
35; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
36; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11]
37; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
38; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
39; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
40; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
41; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15]
42; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm5, %ymm2
43; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
44; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
45; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11]
46; AVX512DQ-NEXT:    vpsllvd %ymm3, %ymm1, %ymm1
47; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
48; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
49; AVX512DQ-NEXT:    retq
50;
51; AVX512BW-LABEL: var_shift_v32i16:
52; AVX512BW:       ## BB#0:
53; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
54; AVX512BW-NEXT:    retq
55  %shift = shl <32 x i16> %a, %b
56  ret <32 x i16> %shift
57}
58
59define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
60; AVX512DQ-LABEL: var_shift_v64i8:
61; AVX512DQ:       ## BB#0:
62; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm4
63; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
64; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
65; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
66; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
67; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm4
68; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
69; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
70; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
71; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
72; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
73; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
74; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
75; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
76; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
77; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
78; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
79; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
80; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
81; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
82; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
83; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
84; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
85; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
86; AVX512DQ-NEXT:    retq
87  %shift = shl <64 x i8> %a, %b
88  ret <64 x i8> %shift
89}
90
91;
92; Uniform Variable Shifts
93;
94
95define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
96; ALL-LABEL: splatvar_shift_v8i64:
97; ALL:       ## BB#0:
98; ALL-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
99; ALL-NEXT:    retq
100  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
101  %shift = shl <8 x i64> %a, %splat
102  ret <8 x i64> %shift
103}
104
105define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
106; ALL-LABEL: splatvar_shift_v16i32:
107; ALL:       ## BB#0:
108; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
109; ALL-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
110; ALL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
111; ALL-NEXT:    retq
112  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
113  %shift = shl <16 x i32> %a, %splat
114  ret <16 x i32> %shift
115}
116
117define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
118; AVX512DQ-LABEL: splatvar_shift_v32i16:
119; AVX512DQ:       ## BB#0:
120; AVX512DQ-NEXT:    vmovd %xmm2, %eax
121; AVX512DQ-NEXT:    movzwl %ax, %eax
122; AVX512DQ-NEXT:    vmovd %eax, %xmm2
123; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm0, %ymm0
124; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm1, %ymm1
125; AVX512DQ-NEXT:    retq
126;
127; AVX512BW-LABEL: splatvar_shift_v32i16:
128; AVX512BW:       ## BB#0:
129; AVX512BW-NEXT:    vmovd %xmm1, %eax
130; AVX512BW-NEXT:    movzwl %ax, %eax
131; AVX512BW-NEXT:    vmovd %eax, %xmm1
132; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
133; AVX512BW-NEXT:    retq
134  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
135  %shift = shl <32 x i16> %a, %splat
136  ret <32 x i16> %shift
137}
138
139define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
140; AVX512DQ-LABEL: splatvar_shift_v64i8:
141; AVX512DQ:       ## BB#0:
142; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
143; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm3
144; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
145; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
146; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
147; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
148; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm3
149; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
150; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
151; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
152; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
153; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm3
154; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
155; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
156; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm3
157; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
158; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
159; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
160; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
161; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
162; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
163; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
164; AVX512DQ-NEXT:    retq
165
166  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
167  %shift = shl <64 x i8> %a, %splat
168  ret <64 x i8> %shift
169}
170
171;
172; Constant Shifts
173;
174
175define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
176; ALL-LABEL: constant_shift_v8i64:
177; ALL:       ## BB#0:
178; ALL-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
179; ALL-NEXT:    retq
180  %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
181  ret <8 x i64> %shift
182}
183
184define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
185; ALL-LABEL: constant_shift_v16i32:
186; ALL:       ## BB#0:
187; ALL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
188; ALL-NEXT:    retq
189  %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
190  ret <16 x i32> %shift
191}
192
193define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
194; AVX512DQ-LABEL: constant_shift_v32i16:
195; AVX512DQ:       ## BB#0:
196; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
197; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
198; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
199; AVX512DQ-NEXT:    retq
200;
201; AVX512BW-LABEL: constant_shift_v32i16:
202; AVX512BW:       ## BB#0:
203; AVX512BW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0
204; AVX512BW-NEXT:    retq
205  %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
206  ret <32 x i16> %shift
207}
208
209define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
210; AVX512DQ-LABEL: constant_shift_v64i8:
211; AVX512DQ:       ## BB#0:
212; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
213; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
214; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
215; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
216; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
217; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
218; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2
219; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
220; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
221; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
222; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
223; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
224; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
225; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
226; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
227; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
228; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
229; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
230; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
231; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
232; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
233; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
234; AVX512DQ-NEXT:    retq
235  %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
236  ret <64 x i8> %shift
237}
238
239;
240; Uniform Constant Shifts
241;
242
243define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
244; ALL-LABEL: splatconstant_shift_v8i64:
245; ALL:       ## BB#0:
246; ALL-NEXT:    vpsllq $7, %zmm0, %zmm0
247; ALL-NEXT:    retq
248  %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
249  ret <8 x i64> %shift
250}
251
252define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
253; ALL-LABEL: splatconstant_shift_v16i32:
254; ALL:       ## BB#0:
255; ALL-NEXT:    vpslld $5, %zmm0, %zmm0
256; ALL-NEXT:    retq
257  %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
258  ret <16 x i32> %shift
259}
260
261define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
262; AVX512DQ-LABEL: splatconstant_shift_v32i16:
263; AVX512DQ:       ## BB#0:
264; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
265; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
266; AVX512DQ-NEXT:    retq
267;
268; AVX512BW-LABEL: splatconstant_shift_v32i16:
269; AVX512BW:       ## BB#0:
270; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
271; AVX512BW-NEXT:    retq
272  %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
273  ret <32 x i16> %shift
274}
275
276define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
277; AVX512DQ-LABEL: splatconstant_shift_v64i8:
278; AVX512DQ:       ## BB#0:
279; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
280; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
281; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
282; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
283; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
284; AVX512DQ-NEXT:    retq
285;
286; AVX512BW-LABEL: splatconstant_shift_v64i8:
287; AVX512BW:       ## BB#0:
288; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
289; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
290; AVX512BW-NEXT:    retq
291  %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
292  ret <64 x i8> %shift
293}
294