1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
3; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
4
5define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
6; BMI2-LABEL: shl32:
7; BMI2:       # %bb.0:
8; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
9; BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
10; BMI2-NEXT:    retl
11;
12; BMI264-LABEL: shl32:
13; BMI264:       # %bb.0:
14; BMI264-NEXT:    shlxl %esi, %edi, %eax
15; BMI264-NEXT:    retq
16  %shl = shl i32 %x, %shamt
17  ret i32 %shl
18}
19
20define i32 @shl32i(i32 %x) nounwind uwtable readnone {
21; BMI2-LABEL: shl32i:
22; BMI2:       # %bb.0:
23; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
24; BMI2-NEXT:    shll $5, %eax
25; BMI2-NEXT:    retl
26;
27; BMI264-LABEL: shl32i:
28; BMI264:       # %bb.0:
29; BMI264-NEXT:    movl %edi, %eax
30; BMI264-NEXT:    shll $5, %eax
31; BMI264-NEXT:    retq
32  %shl = shl i32 %x, 5
33  ret i32 %shl
34}
35
36define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
37; BMI2-LABEL: shl32p:
38; BMI2:       # %bb.0:
39; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
40; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
41; BMI2-NEXT:    shlxl %ecx, (%eax), %eax
42; BMI2-NEXT:    retl
43;
44; BMI264-LABEL: shl32p:
45; BMI264:       # %bb.0:
46; BMI264-NEXT:    shlxl %esi, (%rdi), %eax
47; BMI264-NEXT:    retq
48  %x = load i32, i32* %p
49  %shl = shl i32 %x, %shamt
50  ret i32 %shl
51}
52
53define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
54; BMI2-LABEL: shl32pi:
55; BMI2:       # %bb.0:
56; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
57; BMI2-NEXT:    movl (%eax), %eax
58; BMI2-NEXT:    shll $5, %eax
59; BMI2-NEXT:    retl
60;
61; BMI264-LABEL: shl32pi:
62; BMI264:       # %bb.0:
63; BMI264-NEXT:    movl (%rdi), %eax
64; BMI264-NEXT:    shll $5, %eax
65; BMI264-NEXT:    retq
66  %x = load i32, i32* %p
67  %shl = shl i32 %x, 5
68  ret i32 %shl
69}
70
71define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
72; BMI2-LABEL: shl64:
73; BMI2:       # %bb.0:
74; BMI2-NEXT:    pushl %esi
75; BMI2-NEXT:    .cfi_def_cfa_offset 8
76; BMI2-NEXT:    .cfi_offset %esi, -8
77; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
78; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
79; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
80; BMI2-NEXT:    shldl %cl, %eax, %edx
81; BMI2-NEXT:    shlxl %ecx, %eax, %esi
82; BMI2-NEXT:    xorl %eax, %eax
83; BMI2-NEXT:    testb $32, %cl
84; BMI2-NEXT:    cmovnel %esi, %edx
85; BMI2-NEXT:    cmovel %esi, %eax
86; BMI2-NEXT:    popl %esi
87; BMI2-NEXT:    .cfi_def_cfa_offset 4
88; BMI2-NEXT:    retl
89;
90; BMI264-LABEL: shl64:
91; BMI264:       # %bb.0:
92; BMI264-NEXT:    shlxq %rsi, %rdi, %rax
93; BMI264-NEXT:    retq
94  %shl = shl i64 %x, %shamt
95  ret i64 %shl
96}
97
98define i64 @shl64i(i64 %x) nounwind uwtable readnone {
99; BMI2-LABEL: shl64i:
100; BMI2:       # %bb.0:
101; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
103; BMI2-NEXT:    shldl $7, %eax, %edx
104; BMI2-NEXT:    shll $7, %eax
105; BMI2-NEXT:    retl
106;
107; BMI264-LABEL: shl64i:
108; BMI264:       # %bb.0:
109; BMI264-NEXT:    movq %rdi, %rax
110; BMI264-NEXT:    shlq $7, %rax
111; BMI264-NEXT:    retq
112  %shl = shl i64 %x, 7
113  ret i64 %shl
114}
115
116define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
117; BMI2-LABEL: shl64p:
118; BMI2:       # %bb.0:
119; BMI2-NEXT:    pushl %esi
120; BMI2-NEXT:    .cfi_def_cfa_offset 8
121; BMI2-NEXT:    .cfi_offset %esi, -8
122; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
123; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
124; BMI2-NEXT:    movl (%eax), %esi
125; BMI2-NEXT:    movl 4(%eax), %edx
126; BMI2-NEXT:    shldl %cl, %esi, %edx
127; BMI2-NEXT:    shlxl %ecx, %esi, %esi
128; BMI2-NEXT:    xorl %eax, %eax
129; BMI2-NEXT:    testb $32, %cl
130; BMI2-NEXT:    cmovnel %esi, %edx
131; BMI2-NEXT:    cmovel %esi, %eax
132; BMI2-NEXT:    popl %esi
133; BMI2-NEXT:    .cfi_def_cfa_offset 4
134; BMI2-NEXT:    retl
135;
136; BMI264-LABEL: shl64p:
137; BMI264:       # %bb.0:
138; BMI264-NEXT:    shlxq %rsi, (%rdi), %rax
139; BMI264-NEXT:    retq
140  %x = load i64, i64* %p
141  %shl = shl i64 %x, %shamt
142  ret i64 %shl
143}
144
145define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
146; BMI2-LABEL: shl64pi:
147; BMI2:       # %bb.0:
148; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
149; BMI2-NEXT:    movl (%ecx), %eax
150; BMI2-NEXT:    movl 4(%ecx), %edx
151; BMI2-NEXT:    shldl $7, %eax, %edx
152; BMI2-NEXT:    shll $7, %eax
153; BMI2-NEXT:    retl
154;
155; BMI264-LABEL: shl64pi:
156; BMI264:       # %bb.0:
157; BMI264-NEXT:    movq (%rdi), %rax
158; BMI264-NEXT:    shlq $7, %rax
159; BMI264-NEXT:    retq
160  %x = load i64, i64* %p
161  %shl = shl i64 %x, 7
162  ret i64 %shl
163}
164
165define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
166; BMI2-LABEL: lshr32:
167; BMI2:       # %bb.0:
168; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
169; BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
170; BMI2-NEXT:    retl
171;
172; BMI264-LABEL: lshr32:
173; BMI264:       # %bb.0:
174; BMI264-NEXT:    shrxl %esi, %edi, %eax
175; BMI264-NEXT:    retq
176  %shl = lshr i32 %x, %shamt
177  ret i32 %shl
178}
179
180define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
181; BMI2-LABEL: lshr32p:
182; BMI2:       # %bb.0:
183; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
185; BMI2-NEXT:    shrxl %ecx, (%eax), %eax
186; BMI2-NEXT:    retl
187;
188; BMI264-LABEL: lshr32p:
189; BMI264:       # %bb.0:
190; BMI264-NEXT:    shrxl %esi, (%rdi), %eax
191; BMI264-NEXT:    retq
192  %x = load i32, i32* %p
193  %shl = lshr i32 %x, %shamt
194  ret i32 %shl
195}
196
197define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
198; BMI2-LABEL: lshr64:
199; BMI2:       # %bb.0:
200; BMI2-NEXT:    pushl %esi
201; BMI2-NEXT:    .cfi_def_cfa_offset 8
202; BMI2-NEXT:    .cfi_offset %esi, -8
203; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
204; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
205; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
206; BMI2-NEXT:    shrdl %cl, %edx, %eax
207; BMI2-NEXT:    shrxl %ecx, %edx, %esi
208; BMI2-NEXT:    xorl %edx, %edx
209; BMI2-NEXT:    testb $32, %cl
210; BMI2-NEXT:    cmovnel %esi, %eax
211; BMI2-NEXT:    cmovel %esi, %edx
212; BMI2-NEXT:    popl %esi
213; BMI2-NEXT:    .cfi_def_cfa_offset 4
214; BMI2-NEXT:    retl
215;
216; BMI264-LABEL: lshr64:
217; BMI264:       # %bb.0:
218; BMI264-NEXT:    shrxq %rsi, %rdi, %rax
219; BMI264-NEXT:    retq
220  %shl = lshr i64 %x, %shamt
221  ret i64 %shl
222}
223
224define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
225; BMI2-LABEL: lshr64p:
226; BMI2:       # %bb.0:
227; BMI2-NEXT:    pushl %esi
228; BMI2-NEXT:    .cfi_def_cfa_offset 8
229; BMI2-NEXT:    .cfi_offset %esi, -8
230; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
231; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
232; BMI2-NEXT:    movl (%edx), %eax
233; BMI2-NEXT:    movl 4(%edx), %edx
234; BMI2-NEXT:    shrdl %cl, %edx, %eax
235; BMI2-NEXT:    shrxl %ecx, %edx, %esi
236; BMI2-NEXT:    xorl %edx, %edx
237; BMI2-NEXT:    testb $32, %cl
238; BMI2-NEXT:    cmovnel %esi, %eax
239; BMI2-NEXT:    cmovel %esi, %edx
240; BMI2-NEXT:    popl %esi
241; BMI2-NEXT:    .cfi_def_cfa_offset 4
242; BMI2-NEXT:    retl
243;
244; BMI264-LABEL: lshr64p:
245; BMI264:       # %bb.0:
246; BMI264-NEXT:    shrxq %rsi, (%rdi), %rax
247; BMI264-NEXT:    retq
248  %x = load i64, i64* %p
249  %shl = lshr i64 %x, %shamt
250  ret i64 %shl
251}
252
253define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
254; BMI2-LABEL: ashr32:
255; BMI2:       # %bb.0:
256; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
257; BMI2-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
258; BMI2-NEXT:    retl
259;
260; BMI264-LABEL: ashr32:
261; BMI264:       # %bb.0:
262; BMI264-NEXT:    sarxl %esi, %edi, %eax
263; BMI264-NEXT:    retq
264  %shl = ashr i32 %x, %shamt
265  ret i32 %shl
266}
267
268define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
269; BMI2-LABEL: ashr32p:
270; BMI2:       # %bb.0:
271; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
272; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
273; BMI2-NEXT:    sarxl %ecx, (%eax), %eax
274; BMI2-NEXT:    retl
275;
276; BMI264-LABEL: ashr32p:
277; BMI264:       # %bb.0:
278; BMI264-NEXT:    sarxl %esi, (%rdi), %eax
279; BMI264-NEXT:    retq
280  %x = load i32, i32* %p
281  %shl = ashr i32 %x, %shamt
282  ret i32 %shl
283}
284
285define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
286; BMI2-LABEL: ashr64:
287; BMI2:       # %bb.0:
288; BMI2-NEXT:    pushl %esi
289; BMI2-NEXT:    .cfi_def_cfa_offset 8
290; BMI2-NEXT:    .cfi_offset %esi, -8
291; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
292; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
293; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
294; BMI2-NEXT:    shrdl %cl, %edx, %eax
295; BMI2-NEXT:    sarxl %ecx, %edx, %esi
296; BMI2-NEXT:    sarl $31, %edx
297; BMI2-NEXT:    testb $32, %cl
298; BMI2-NEXT:    cmovnel %esi, %eax
299; BMI2-NEXT:    cmovel %esi, %edx
300; BMI2-NEXT:    popl %esi
301; BMI2-NEXT:    .cfi_def_cfa_offset 4
302; BMI2-NEXT:    retl
303;
304; BMI264-LABEL: ashr64:
305; BMI264:       # %bb.0:
306; BMI264-NEXT:    sarxq %rsi, %rdi, %rax
307; BMI264-NEXT:    retq
308  %shl = ashr i64 %x, %shamt
309  ret i64 %shl
310}
311
312define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
313; BMI2-LABEL: ashr64p:
314; BMI2:       # %bb.0:
315; BMI2-NEXT:    pushl %esi
316; BMI2-NEXT:    .cfi_def_cfa_offset 8
317; BMI2-NEXT:    .cfi_offset %esi, -8
318; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
319; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
320; BMI2-NEXT:    movl (%edx), %eax
321; BMI2-NEXT:    movl 4(%edx), %edx
322; BMI2-NEXT:    shrdl %cl, %edx, %eax
323; BMI2-NEXT:    sarxl %ecx, %edx, %esi
324; BMI2-NEXT:    sarl $31, %edx
325; BMI2-NEXT:    testb $32, %cl
326; BMI2-NEXT:    cmovnel %esi, %eax
327; BMI2-NEXT:    cmovel %esi, %edx
328; BMI2-NEXT:    popl %esi
329; BMI2-NEXT:    .cfi_def_cfa_offset 4
330; BMI2-NEXT:    retl
331;
332; BMI264-LABEL: ashr64p:
333; BMI264:       # %bb.0:
334; BMI264-NEXT:    sarxq %rsi, (%rdi), %rax
335; BMI264-NEXT:    retq
336  %x = load i64, i64* %p
337  %shl = ashr i64 %x, %shamt
338  ret i64 %shl
339}
340
341define i32 @shl32and(i32 %t, i32 %val) nounwind {
342; BMI2-LABEL: shl32and:
343; BMI2:       # %bb.0:
344; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
345; BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %eax
346; BMI2-NEXT:    retl
347;
348; BMI264-LABEL: shl32and:
349; BMI264:       # %bb.0:
350; BMI264-NEXT:    shlxl %edi, %esi, %eax
351; BMI264-NEXT:    retq
352  %shamt = and i32 %t, 31
353  %res = shl i32 %val, %shamt
354  ret i32 %res
355}
356
357define i64 @shl64and(i64 %t, i64 %val) nounwind {
358; BMI2-LABEL: shl64and:
359; BMI2:       # %bb.0:
360; BMI2-NEXT:    pushl %esi
361; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
362; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
363; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
364; BMI2-NEXT:    shldl %cl, %eax, %edx
365; BMI2-NEXT:    shlxl %ecx, %eax, %esi
366; BMI2-NEXT:    xorl %eax, %eax
367; BMI2-NEXT:    testb $32, %cl
368; BMI2-NEXT:    cmovnel %esi, %edx
369; BMI2-NEXT:    cmovel %esi, %eax
370; BMI2-NEXT:    popl %esi
371; BMI2-NEXT:    retl
372;
373; BMI264-LABEL: shl64and:
374; BMI264:       # %bb.0:
375; BMI264-NEXT:    shlxq %rdi, %rsi, %rax
376; BMI264-NEXT:    retq
377  %shamt = and i64 %t, 63
378  %res = shl i64 %val, %shamt
379  ret i64 %res
380}
381
382define i32 @lshr32and(i32 %t, i32 %val) nounwind {
383; BMI2-LABEL: lshr32and:
384; BMI2:       # %bb.0:
385; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
386; BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %eax
387; BMI2-NEXT:    retl
388;
389; BMI264-LABEL: lshr32and:
390; BMI264:       # %bb.0:
391; BMI264-NEXT:    shrxl %edi, %esi, %eax
392; BMI264-NEXT:    retq
393  %shamt = and i32 %t, 31
394  %res = lshr i32 %val, %shamt
395  ret i32 %res
396}
397
398define i64 @lshr64and(i64 %t, i64 %val) nounwind {
399; BMI2-LABEL: lshr64and:
400; BMI2:       # %bb.0:
401; BMI2-NEXT:    pushl %esi
402; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
403; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
404; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
405; BMI2-NEXT:    shrdl %cl, %edx, %eax
406; BMI2-NEXT:    shrxl %ecx, %edx, %esi
407; BMI2-NEXT:    xorl %edx, %edx
408; BMI2-NEXT:    testb $32, %cl
409; BMI2-NEXT:    cmovnel %esi, %eax
410; BMI2-NEXT:    cmovel %esi, %edx
411; BMI2-NEXT:    popl %esi
412; BMI2-NEXT:    retl
413;
414; BMI264-LABEL: lshr64and:
415; BMI264:       # %bb.0:
416; BMI264-NEXT:    shrxq %rdi, %rsi, %rax
417; BMI264-NEXT:    retq
418  %shamt = and i64 %t, 63
419  %res = lshr i64 %val, %shamt
420  ret i64 %res
421}
422
423define i32 @ashr32and(i32 %t, i32 %val) nounwind {
424; BMI2-LABEL: ashr32and:
425; BMI2:       # %bb.0:
426; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
427; BMI2-NEXT:    sarxl %eax, {{[0-9]+}}(%esp), %eax
428; BMI2-NEXT:    retl
429;
430; BMI264-LABEL: ashr32and:
431; BMI264:       # %bb.0:
432; BMI264-NEXT:    sarxl %edi, %esi, %eax
433; BMI264-NEXT:    retq
434  %shamt = and i32 %t, 31
435  %res = ashr i32 %val, %shamt
436  ret i32 %res
437}
438
439define i64 @ashr64and(i64 %t, i64 %val) nounwind {
440; BMI2-LABEL: ashr64and:
441; BMI2:       # %bb.0:
442; BMI2-NEXT:    pushl %esi
443; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
444; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
445; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
446; BMI2-NEXT:    shrdl %cl, %edx, %eax
447; BMI2-NEXT:    sarxl %ecx, %edx, %esi
448; BMI2-NEXT:    sarl $31, %edx
449; BMI2-NEXT:    testb $32, %cl
450; BMI2-NEXT:    cmovnel %esi, %eax
451; BMI2-NEXT:    cmovel %esi, %edx
452; BMI2-NEXT:    popl %esi
453; BMI2-NEXT:    retl
454;
455; BMI264-LABEL: ashr64and:
456; BMI264:       # %bb.0:
457; BMI264-NEXT:    sarxq %rdi, %rsi, %rax
458; BMI264-NEXT:    retq
459  %shamt = and i64 %t, 63
460  %res = ashr i64 %val, %shamt
461  ret i64 %res
462}
463