1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK0
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK2
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2
12
13; Patterns:
14;    c) x &  (-1 >> y)
15;    d) x << y >> y
16; are equivalent, but we prefer the second variant if we have BMI2.
17
18; We do not test the variant where y = (32 - z), because that is BMI2's BZHI.
19
20; ---------------------------------------------------------------------------- ;
21; 8-bit
22; ---------------------------------------------------------------------------- ;
23
24define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
25; X86-LABEL: clear_highbits8_c0:
26; X86:       # %bb.0:
27; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
28; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
29; X86-NEXT:    shlb %cl, %al
30; X86-NEXT:    shrb %cl, %al
31; X86-NEXT:    retl
32;
33; X64-LABEL: clear_highbits8_c0:
34; X64:       # %bb.0:
35; X64-NEXT:    movl %esi, %ecx
36; X64-NEXT:    movl %edi, %eax
37; X64-NEXT:    shlb %cl, %al
38; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
39; X64-NEXT:    shrb %cl, %al
40; X64-NEXT:    # kill: def $al killed $al killed $eax
41; X64-NEXT:    retq
42  %mask = lshr i8 -1, %numhighbits
43  %masked = and i8 %mask, %val
44  ret i8 %masked
45}
46
47define i8 @clear_highbits8_c2_load(i8* %w, i8 %numhighbits) nounwind {
48; X86-LABEL: clear_highbits8_c2_load:
49; X86:       # %bb.0:
50; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
51; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
52; X86-NEXT:    movb (%eax), %al
53; X86-NEXT:    shlb %cl, %al
54; X86-NEXT:    shrb %cl, %al
55; X86-NEXT:    retl
56;
57; X64-LABEL: clear_highbits8_c2_load:
58; X64:       # %bb.0:
59; X64-NEXT:    movl %esi, %ecx
60; X64-NEXT:    movb (%rdi), %al
61; X64-NEXT:    shlb %cl, %al
62; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
63; X64-NEXT:    shrb %cl, %al
64; X64-NEXT:    retq
65  %val = load i8, i8* %w
66  %mask = lshr i8 -1, %numhighbits
67  %masked = and i8 %mask, %val
68  ret i8 %masked
69}
70
71define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
72; X86-LABEL: clear_highbits8_c4_commutative:
73; X86:       # %bb.0:
74; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
75; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
76; X86-NEXT:    shlb %cl, %al
77; X86-NEXT:    shrb %cl, %al
78; X86-NEXT:    retl
79;
80; X64-LABEL: clear_highbits8_c4_commutative:
81; X64:       # %bb.0:
82; X64-NEXT:    movl %esi, %ecx
83; X64-NEXT:    movl %edi, %eax
84; X64-NEXT:    shlb %cl, %al
85; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
86; X64-NEXT:    shrb %cl, %al
87; X64-NEXT:    # kill: def $al killed $al killed $eax
88; X64-NEXT:    retq
89  %mask = lshr i8 -1, %numhighbits
90  %masked = and i8 %val, %mask ; swapped order
91  ret i8 %masked
92}
93
94; ---------------------------------------------------------------------------- ;
95; 16-bit
96; ---------------------------------------------------------------------------- ;
97
98define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
99; X86-NOBMI2-LABEL: clear_highbits16_c0:
100; X86-NOBMI2:       # %bb.0:
101; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
102; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
103; X86-NOBMI2-NEXT:    shll %cl, %eax
104; X86-NOBMI2-NEXT:    movzwl %ax, %eax
105; X86-NOBMI2-NEXT:    shrl %cl, %eax
106; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
107; X86-NOBMI2-NEXT:    retl
108;
109; X86-BMI2-LABEL: clear_highbits16_c0:
110; X86-BMI2:       # %bb.0:
111; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
112; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
113; X86-BMI2-NEXT:    movzwl %cx, %ecx
114; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
115; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
116; X86-BMI2-NEXT:    retl
117;
118; X64-NOBMI2-LABEL: clear_highbits16_c0:
119; X64-NOBMI2:       # %bb.0:
120; X64-NOBMI2-NEXT:    movl %esi, %ecx
121; X64-NOBMI2-NEXT:    shll %cl, %edi
122; X64-NOBMI2-NEXT:    movzwl %di, %eax
123; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
124; X64-NOBMI2-NEXT:    shrl %cl, %eax
125; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
126; X64-NOBMI2-NEXT:    retq
127;
128; X64-BMI2-LABEL: clear_highbits16_c0:
129; X64-BMI2:       # %bb.0:
130; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
131; X64-BMI2-NEXT:    movzwl %ax, %eax
132; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
133; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
134; X64-BMI2-NEXT:    retq
135  %mask = lshr i16 -1, %numhighbits
136  %masked = and i16 %mask, %val
137  ret i16 %masked
138}
139
140define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
141; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
142; X86-NOBMI2:       # %bb.0:
143; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
144; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
145; X86-NOBMI2-NEXT:    shll %cl, %eax
146; X86-NOBMI2-NEXT:    movzwl %ax, %eax
147; X86-NOBMI2-NEXT:    shrl %cl, %eax
148; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
149; X86-NOBMI2-NEXT:    retl
150;
151; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
152; X86-BMI2:       # %bb.0:
153; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
154; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
155; X86-BMI2-NEXT:    movzwl %cx, %ecx
156; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
157; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
158; X86-BMI2-NEXT:    retl
159;
160; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
161; X64-NOBMI2:       # %bb.0:
162; X64-NOBMI2-NEXT:    movl %esi, %ecx
163; X64-NOBMI2-NEXT:    shll %cl, %edi
164; X64-NOBMI2-NEXT:    movzwl %di, %eax
165; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
166; X64-NOBMI2-NEXT:    shrl %cl, %eax
167; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
168; X64-NOBMI2-NEXT:    retq
169;
170; X64-BMI2-LABEL: clear_highbits16_c1_indexzext:
171; X64-BMI2:       # %bb.0:
172; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
173; X64-BMI2-NEXT:    movzwl %ax, %eax
174; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
175; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
176; X64-BMI2-NEXT:    retq
177  %sh_prom = zext i8 %numhighbits to i16
178  %mask = lshr i16 -1, %sh_prom
179  %masked = and i16 %mask, %val
180  ret i16 %masked
181}
182
183define i16 @clear_highbits16_c2_load(i16* %w, i16 %numhighbits) nounwind {
184; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
185; X86-NOBMI2:       # %bb.0:
186; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
187; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
188; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
189; X86-NOBMI2-NEXT:    shll %cl, %eax
190; X86-NOBMI2-NEXT:    movzwl %ax, %eax
191; X86-NOBMI2-NEXT:    shrl %cl, %eax
192; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
193; X86-NOBMI2-NEXT:    retl
194;
195; X86-BMI2-LABEL: clear_highbits16_c2_load:
196; X86-BMI2:       # %bb.0:
197; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
198; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
199; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
200; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
201; X86-BMI2-NEXT:    movzwl %cx, %ecx
202; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
203; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
204; X86-BMI2-NEXT:    retl
205;
206; X64-NOBMI2-LABEL: clear_highbits16_c2_load:
207; X64-NOBMI2:       # %bb.0:
208; X64-NOBMI2-NEXT:    movl %esi, %ecx
209; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
210; X64-NOBMI2-NEXT:    shll %cl, %eax
211; X64-NOBMI2-NEXT:    movzwl %ax, %eax
212; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
213; X64-NOBMI2-NEXT:    shrl %cl, %eax
214; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
215; X64-NOBMI2-NEXT:    retq
216;
217; X64-BMI2-LABEL: clear_highbits16_c2_load:
218; X64-BMI2:       # %bb.0:
219; X64-BMI2-NEXT:    movzwl (%rdi), %eax
220; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
221; X64-BMI2-NEXT:    movzwl %ax, %eax
222; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
223; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
224; X64-BMI2-NEXT:    retq
225  %val = load i16, i16* %w
226  %mask = lshr i16 -1, %numhighbits
227  %masked = and i16 %mask, %val
228  ret i16 %masked
229}
230
231define i16 @clear_highbits16_c3_load_indexzext(i16* %w, i8 %numhighbits) nounwind {
232; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
233; X86-NOBMI2:       # %bb.0:
234; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
235; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
236; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
237; X86-NOBMI2-NEXT:    shll %cl, %eax
238; X86-NOBMI2-NEXT:    movzwl %ax, %eax
239; X86-NOBMI2-NEXT:    shrl %cl, %eax
240; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
241; X86-NOBMI2-NEXT:    retl
242;
243; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
244; X86-BMI2:       # %bb.0:
245; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
246; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
247; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
248; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
249; X86-BMI2-NEXT:    movzwl %cx, %ecx
250; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
251; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
252; X86-BMI2-NEXT:    retl
253;
254; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
255; X64-NOBMI2:       # %bb.0:
256; X64-NOBMI2-NEXT:    movl %esi, %ecx
257; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
258; X64-NOBMI2-NEXT:    shll %cl, %eax
259; X64-NOBMI2-NEXT:    movzwl %ax, %eax
260; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
261; X64-NOBMI2-NEXT:    shrl %cl, %eax
262; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
263; X64-NOBMI2-NEXT:    retq
264;
265; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
266; X64-BMI2:       # %bb.0:
267; X64-BMI2-NEXT:    movzwl (%rdi), %eax
268; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
269; X64-BMI2-NEXT:    movzwl %ax, %eax
270; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
271; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
272; X64-BMI2-NEXT:    retq
273  %val = load i16, i16* %w
274  %sh_prom = zext i8 %numhighbits to i16
275  %mask = lshr i16 -1, %sh_prom
276  %masked = and i16 %mask, %val
277  ret i16 %masked
278}
279
280define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
281; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
282; X86-NOBMI2:       # %bb.0:
283; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
284; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
285; X86-NOBMI2-NEXT:    shll %cl, %eax
286; X86-NOBMI2-NEXT:    movzwl %ax, %eax
287; X86-NOBMI2-NEXT:    shrl %cl, %eax
288; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
289; X86-NOBMI2-NEXT:    retl
290;
291; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
292; X86-BMI2:       # %bb.0:
293; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
294; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
295; X86-BMI2-NEXT:    movzwl %cx, %ecx
296; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
297; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
298; X86-BMI2-NEXT:    retl
299;
300; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative:
301; X64-NOBMI2:       # %bb.0:
302; X64-NOBMI2-NEXT:    movl %esi, %ecx
303; X64-NOBMI2-NEXT:    shll %cl, %edi
304; X64-NOBMI2-NEXT:    movzwl %di, %eax
305; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
306; X64-NOBMI2-NEXT:    shrl %cl, %eax
307; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
308; X64-NOBMI2-NEXT:    retq
309;
310; X64-BMI2-LABEL: clear_highbits16_c4_commutative:
311; X64-BMI2:       # %bb.0:
312; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
313; X64-BMI2-NEXT:    movzwl %ax, %eax
314; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
315; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
316; X64-BMI2-NEXT:    retq
317  %mask = lshr i16 -1, %numhighbits
318  %masked = and i16 %val, %mask ; swapped order
319  ret i16 %masked
320}
321
322; ---------------------------------------------------------------------------- ;
323; 32-bit
324; ---------------------------------------------------------------------------- ;
325
326define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
327; X86-NOBMI2-LABEL: clear_highbits32_c0:
328; X86-NOBMI2:       # %bb.0:
329; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
330; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; X86-NOBMI2-NEXT:    shll %cl, %eax
332; X86-NOBMI2-NEXT:    shrl %cl, %eax
333; X86-NOBMI2-NEXT:    retl
334;
335; X86-BMI2-LABEL: clear_highbits32_c0:
336; X86-BMI2:       # %bb.0:
337; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
338; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
339; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
340; X86-BMI2-NEXT:    retl
341;
342; X64-NOBMI2-LABEL: clear_highbits32_c0:
343; X64-NOBMI2:       # %bb.0:
344; X64-NOBMI2-NEXT:    movl %esi, %ecx
345; X64-NOBMI2-NEXT:    movl %edi, %eax
346; X64-NOBMI2-NEXT:    shll %cl, %eax
347; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
348; X64-NOBMI2-NEXT:    shrl %cl, %eax
349; X64-NOBMI2-NEXT:    retq
350;
351; X64-BMI2-LABEL: clear_highbits32_c0:
352; X64-BMI2:       # %bb.0:
353; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
354; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
355; X64-BMI2-NEXT:    retq
356  %mask = lshr i32 -1, %numhighbits
357  %masked = and i32 %mask, %val
358  ret i32 %masked
359}
360
361define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
362; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
363; X86-NOBMI2:       # %bb.0:
364; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
365; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
366; X86-NOBMI2-NEXT:    shll %cl, %eax
367; X86-NOBMI2-NEXT:    shrl %cl, %eax
368; X86-NOBMI2-NEXT:    retl
369;
370; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
371; X86-BMI2:       # %bb.0:
372; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
373; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
374; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
375; X86-BMI2-NEXT:    retl
376;
377; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
378; X64-NOBMI2:       # %bb.0:
379; X64-NOBMI2-NEXT:    movl %esi, %ecx
380; X64-NOBMI2-NEXT:    movl %edi, %eax
381; X64-NOBMI2-NEXT:    shll %cl, %eax
382; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
383; X64-NOBMI2-NEXT:    shrl %cl, %eax
384; X64-NOBMI2-NEXT:    retq
385;
386; X64-BMI2-LABEL: clear_highbits32_c1_indexzext:
387; X64-BMI2:       # %bb.0:
388; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
389; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
390; X64-BMI2-NEXT:    retq
391  %sh_prom = zext i8 %numhighbits to i32
392  %mask = lshr i32 -1, %sh_prom
393  %masked = and i32 %mask, %val
394  ret i32 %masked
395}
396
397define i32 @clear_highbits32_c2_load(i32* %w, i32 %numhighbits) nounwind {
398; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
399; X86-NOBMI2:       # %bb.0:
400; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
401; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
402; X86-NOBMI2-NEXT:    movl (%eax), %eax
403; X86-NOBMI2-NEXT:    shll %cl, %eax
404; X86-NOBMI2-NEXT:    shrl %cl, %eax
405; X86-NOBMI2-NEXT:    retl
406;
407; X86-BMI2-LABEL: clear_highbits32_c2_load:
408; X86-BMI2:       # %bb.0:
409; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
410; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
411; X86-BMI2-NEXT:    shlxl %ecx, (%eax), %eax
412; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
413; X86-BMI2-NEXT:    retl
414;
415; X64-NOBMI2-LABEL: clear_highbits32_c2_load:
416; X64-NOBMI2:       # %bb.0:
417; X64-NOBMI2-NEXT:    movl %esi, %ecx
418; X64-NOBMI2-NEXT:    movl (%rdi), %eax
419; X64-NOBMI2-NEXT:    shll %cl, %eax
420; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
421; X64-NOBMI2-NEXT:    shrl %cl, %eax
422; X64-NOBMI2-NEXT:    retq
423;
424; X64-BMI2-LABEL: clear_highbits32_c2_load:
425; X64-BMI2:       # %bb.0:
426; X64-BMI2-NEXT:    shlxl %esi, (%rdi), %eax
427; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
428; X64-BMI2-NEXT:    retq
429  %val = load i32, i32* %w
430  %mask = lshr i32 -1, %numhighbits
431  %masked = and i32 %mask, %val
432  ret i32 %masked
433}
434
435define i32 @clear_highbits32_c3_load_indexzext(i32* %w, i8 %numhighbits) nounwind {
436; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
437; X86-NOBMI2:       # %bb.0:
438; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
439; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
440; X86-NOBMI2-NEXT:    movl (%eax), %eax
441; X86-NOBMI2-NEXT:    shll %cl, %eax
442; X86-NOBMI2-NEXT:    shrl %cl, %eax
443; X86-NOBMI2-NEXT:    retl
444;
445; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
446; X86-BMI2:       # %bb.0:
447; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
448; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
449; X86-BMI2-NEXT:    shlxl %ecx, (%eax), %eax
450; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
451; X86-BMI2-NEXT:    retl
452;
453; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
454; X64-NOBMI2:       # %bb.0:
455; X64-NOBMI2-NEXT:    movl %esi, %ecx
456; X64-NOBMI2-NEXT:    movl (%rdi), %eax
457; X64-NOBMI2-NEXT:    shll %cl, %eax
458; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
459; X64-NOBMI2-NEXT:    shrl %cl, %eax
460; X64-NOBMI2-NEXT:    retq
461;
462; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
463; X64-BMI2:       # %bb.0:
464; X64-BMI2-NEXT:    shlxl %esi, (%rdi), %eax
465; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
466; X64-BMI2-NEXT:    retq
467  %val = load i32, i32* %w
468  %sh_prom = zext i8 %numhighbits to i32
469  %mask = lshr i32 -1, %sh_prom
470  %masked = and i32 %mask, %val
471  ret i32 %masked
472}
473
474define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
475; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
476; X86-NOBMI2:       # %bb.0:
477; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
478; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
479; X86-NOBMI2-NEXT:    shll %cl, %eax
480; X86-NOBMI2-NEXT:    shrl %cl, %eax
481; X86-NOBMI2-NEXT:    retl
482;
483; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
484; X86-BMI2:       # %bb.0:
485; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
486; X86-BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
487; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
488; X86-BMI2-NEXT:    retl
489;
490; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative:
491; X64-NOBMI2:       # %bb.0:
492; X64-NOBMI2-NEXT:    movl %esi, %ecx
493; X64-NOBMI2-NEXT:    movl %edi, %eax
494; X64-NOBMI2-NEXT:    shll %cl, %eax
495; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
496; X64-NOBMI2-NEXT:    shrl %cl, %eax
497; X64-NOBMI2-NEXT:    retq
498;
499; X64-BMI2-LABEL: clear_highbits32_c4_commutative:
500; X64-BMI2:       # %bb.0:
501; X64-BMI2-NEXT:    shlxl %esi, %edi, %eax
502; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
503; X64-BMI2-NEXT:    retq
504  %mask = lshr i32 -1, %numhighbits
505  %masked = and i32 %val, %mask ; swapped order
506  ret i32 %masked
507}
508
509; ---------------------------------------------------------------------------- ;
510; 64-bit
511; ---------------------------------------------------------------------------- ;
512
513define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind {
514; X86-FALLBACK0-LABEL: clear_highbits64_c0:
515; X86-FALLBACK0:       # %bb.0:
516; X86-FALLBACK0-NEXT:    pushl %esi
517; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
518; X86-FALLBACK0-NEXT:    movl $-1, %eax
519; X86-FALLBACK0-NEXT:    movl $-1, %esi
520; X86-FALLBACK0-NEXT:    shrl %cl, %esi
521; X86-FALLBACK0-NEXT:    xorl %edx, %edx
522; X86-FALLBACK0-NEXT:    testb $32, %cl
523; X86-FALLBACK0-NEXT:    jne .LBB13_1
524; X86-FALLBACK0-NEXT:  # %bb.2:
525; X86-FALLBACK0-NEXT:    movl %esi, %edx
526; X86-FALLBACK0-NEXT:    jmp .LBB13_3
527; X86-FALLBACK0-NEXT:  .LBB13_1:
528; X86-FALLBACK0-NEXT:    movl %esi, %eax
529; X86-FALLBACK0-NEXT:  .LBB13_3:
530; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %eax
531; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %edx
532; X86-FALLBACK0-NEXT:    popl %esi
533; X86-FALLBACK0-NEXT:    retl
534;
535; X86-FALLBACK1-LABEL: clear_highbits64_c0:
536; X86-FALLBACK1:       # %bb.0:
537; X86-FALLBACK1-NEXT:    pushl %esi
538; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
539; X86-FALLBACK1-NEXT:    movl $-1, %esi
540; X86-FALLBACK1-NEXT:    movl $-1, %eax
541; X86-FALLBACK1-NEXT:    shrl %cl, %eax
542; X86-FALLBACK1-NEXT:    xorl %edx, %edx
543; X86-FALLBACK1-NEXT:    testb $32, %cl
544; X86-FALLBACK1-NEXT:    cmovel %eax, %edx
545; X86-FALLBACK1-NEXT:    cmovel %esi, %eax
546; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %eax
547; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %edx
548; X86-FALLBACK1-NEXT:    popl %esi
549; X86-FALLBACK1-NEXT:    retl
550;
551; X86-FALLBACK2-LABEL: clear_highbits64_c0:
552; X86-FALLBACK2:       # %bb.0:
553; X86-FALLBACK2-NEXT:    pushl %esi
554; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
555; X86-FALLBACK2-NEXT:    movl $-1, %esi
556; X86-FALLBACK2-NEXT:    movl $-1, %eax
557; X86-FALLBACK2-NEXT:    shrl %cl, %eax
558; X86-FALLBACK2-NEXT:    xorl %edx, %edx
559; X86-FALLBACK2-NEXT:    testb $32, %cl
560; X86-FALLBACK2-NEXT:    cmovel %eax, %edx
561; X86-FALLBACK2-NEXT:    cmovel %esi, %eax
562; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %eax
563; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %edx
564; X86-FALLBACK2-NEXT:    popl %esi
565; X86-FALLBACK2-NEXT:    retl
566;
567; X86-BMI2-LABEL: clear_highbits64_c0:
568; X86-BMI2:       # %bb.0:
569; X86-BMI2-NEXT:    pushl %esi
570; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
571; X86-BMI2-NEXT:    movl $-1, %eax
572; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
573; X86-BMI2-NEXT:    xorl %edx, %edx
574; X86-BMI2-NEXT:    testb $32, %cl
575; X86-BMI2-NEXT:    cmovel %esi, %edx
576; X86-BMI2-NEXT:    cmovnel %esi, %eax
577; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
578; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
579; X86-BMI2-NEXT:    popl %esi
580; X86-BMI2-NEXT:    retl
581;
582; X64-NOBMI2-LABEL: clear_highbits64_c0:
583; X64-NOBMI2:       # %bb.0:
584; X64-NOBMI2-NEXT:    movq %rsi, %rcx
585; X64-NOBMI2-NEXT:    movq %rdi, %rax
586; X64-NOBMI2-NEXT:    shlq %cl, %rax
587; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
588; X64-NOBMI2-NEXT:    shrq %cl, %rax
589; X64-NOBMI2-NEXT:    retq
590;
591; X64-BMI2-LABEL: clear_highbits64_c0:
592; X64-BMI2:       # %bb.0:
593; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
594; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
595; X64-BMI2-NEXT:    retq
596  %mask = lshr i64 -1, %numhighbits
597  %masked = and i64 %mask, %val
598  ret i64 %masked
599}
600
601define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind {
602; X86-FALLBACK0-LABEL: clear_highbits64_c1_indexzext:
603; X86-FALLBACK0:       # %bb.0:
604; X86-FALLBACK0-NEXT:    pushl %esi
605; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
606; X86-FALLBACK0-NEXT:    movl $-1, %eax
607; X86-FALLBACK0-NEXT:    movl $-1, %esi
608; X86-FALLBACK0-NEXT:    shrl %cl, %esi
609; X86-FALLBACK0-NEXT:    xorl %edx, %edx
610; X86-FALLBACK0-NEXT:    testb $32, %cl
611; X86-FALLBACK0-NEXT:    jne .LBB14_1
612; X86-FALLBACK0-NEXT:  # %bb.2:
613; X86-FALLBACK0-NEXT:    movl %esi, %edx
614; X86-FALLBACK0-NEXT:    jmp .LBB14_3
615; X86-FALLBACK0-NEXT:  .LBB14_1:
616; X86-FALLBACK0-NEXT:    movl %esi, %eax
617; X86-FALLBACK0-NEXT:  .LBB14_3:
618; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %eax
619; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %edx
620; X86-FALLBACK0-NEXT:    popl %esi
621; X86-FALLBACK0-NEXT:    retl
622;
623; X86-FALLBACK1-LABEL: clear_highbits64_c1_indexzext:
624; X86-FALLBACK1:       # %bb.0:
625; X86-FALLBACK1-NEXT:    pushl %esi
626; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
627; X86-FALLBACK1-NEXT:    movl $-1, %esi
628; X86-FALLBACK1-NEXT:    movl $-1, %eax
629; X86-FALLBACK1-NEXT:    shrl %cl, %eax
630; X86-FALLBACK1-NEXT:    xorl %edx, %edx
631; X86-FALLBACK1-NEXT:    testb $32, %cl
632; X86-FALLBACK1-NEXT:    cmovel %eax, %edx
633; X86-FALLBACK1-NEXT:    cmovel %esi, %eax
634; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %eax
635; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %edx
636; X86-FALLBACK1-NEXT:    popl %esi
637; X86-FALLBACK1-NEXT:    retl
638;
639; X86-FALLBACK2-LABEL: clear_highbits64_c1_indexzext:
640; X86-FALLBACK2:       # %bb.0:
641; X86-FALLBACK2-NEXT:    pushl %esi
642; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
643; X86-FALLBACK2-NEXT:    movl $-1, %esi
644; X86-FALLBACK2-NEXT:    movl $-1, %eax
645; X86-FALLBACK2-NEXT:    shrl %cl, %eax
646; X86-FALLBACK2-NEXT:    xorl %edx, %edx
647; X86-FALLBACK2-NEXT:    testb $32, %cl
648; X86-FALLBACK2-NEXT:    cmovel %eax, %edx
649; X86-FALLBACK2-NEXT:    cmovel %esi, %eax
650; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %eax
651; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %edx
652; X86-FALLBACK2-NEXT:    popl %esi
653; X86-FALLBACK2-NEXT:    retl
654;
655; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
656; X86-BMI2:       # %bb.0:
657; X86-BMI2-NEXT:    pushl %esi
658; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
659; X86-BMI2-NEXT:    movl $-1, %eax
660; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
661; X86-BMI2-NEXT:    xorl %edx, %edx
662; X86-BMI2-NEXT:    testb $32, %cl
663; X86-BMI2-NEXT:    cmovel %esi, %edx
664; X86-BMI2-NEXT:    cmovnel %esi, %eax
665; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
666; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
667; X86-BMI2-NEXT:    popl %esi
668; X86-BMI2-NEXT:    retl
669;
670; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext:
671; X64-NOBMI2:       # %bb.0:
672; X64-NOBMI2-NEXT:    movl %esi, %ecx
673; X64-NOBMI2-NEXT:    movq %rdi, %rax
674; X64-NOBMI2-NEXT:    shlq %cl, %rax
675; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
676; X64-NOBMI2-NEXT:    shrq %cl, %rax
677; X64-NOBMI2-NEXT:    retq
678;
679; X64-BMI2-LABEL: clear_highbits64_c1_indexzext:
680; X64-BMI2:       # %bb.0:
681; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
682; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
683; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
684; X64-BMI2-NEXT:    retq
685  %sh_prom = zext i8 %numhighbits to i64
686  %mask = lshr i64 -1, %sh_prom
687  %masked = and i64 %mask, %val
688  ret i64 %masked
689}
690
691define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind {
692; X86-FALLBACK0-LABEL: clear_highbits64_c2_load:
693; X86-FALLBACK0:       # %bb.0:
694; X86-FALLBACK0-NEXT:    pushl %edi
695; X86-FALLBACK0-NEXT:    pushl %esi
696; X86-FALLBACK0-NEXT:    movl {{[0-9]+}}(%esp), %esi
697; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
698; X86-FALLBACK0-NEXT:    movl $-1, %eax
699; X86-FALLBACK0-NEXT:    movl $-1, %edi
700; X86-FALLBACK0-NEXT:    shrl %cl, %edi
701; X86-FALLBACK0-NEXT:    xorl %edx, %edx
702; X86-FALLBACK0-NEXT:    testb $32, %cl
703; X86-FALLBACK0-NEXT:    jne .LBB15_1
704; X86-FALLBACK0-NEXT:  # %bb.2:
705; X86-FALLBACK0-NEXT:    movl %edi, %edx
706; X86-FALLBACK0-NEXT:    jmp .LBB15_3
707; X86-FALLBACK0-NEXT:  .LBB15_1:
708; X86-FALLBACK0-NEXT:    movl %edi, %eax
709; X86-FALLBACK0-NEXT:  .LBB15_3:
710; X86-FALLBACK0-NEXT:    andl (%esi), %eax
711; X86-FALLBACK0-NEXT:    andl 4(%esi), %edx
712; X86-FALLBACK0-NEXT:    popl %esi
713; X86-FALLBACK0-NEXT:    popl %edi
714; X86-FALLBACK0-NEXT:    retl
715;
716; X86-FALLBACK1-LABEL: clear_highbits64_c2_load:
717; X86-FALLBACK1:       # %bb.0:
718; X86-FALLBACK1-NEXT:    pushl %edi
719; X86-FALLBACK1-NEXT:    pushl %esi
720; X86-FALLBACK1-NEXT:    movl {{[0-9]+}}(%esp), %esi
721; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
722; X86-FALLBACK1-NEXT:    movl $-1, %edi
723; X86-FALLBACK1-NEXT:    movl $-1, %eax
724; X86-FALLBACK1-NEXT:    shrl %cl, %eax
725; X86-FALLBACK1-NEXT:    xorl %edx, %edx
726; X86-FALLBACK1-NEXT:    testb $32, %cl
727; X86-FALLBACK1-NEXT:    cmovel %eax, %edx
728; X86-FALLBACK1-NEXT:    cmovel %edi, %eax
729; X86-FALLBACK1-NEXT:    andl (%esi), %eax
730; X86-FALLBACK1-NEXT:    andl 4(%esi), %edx
731; X86-FALLBACK1-NEXT:    popl %esi
732; X86-FALLBACK1-NEXT:    popl %edi
733; X86-FALLBACK1-NEXT:    retl
734;
735; X86-FALLBACK2-LABEL: clear_highbits64_c2_load:
736; X86-FALLBACK2:       # %bb.0:
737; X86-FALLBACK2-NEXT:    pushl %edi
738; X86-FALLBACK2-NEXT:    pushl %esi
739; X86-FALLBACK2-NEXT:    movl {{[0-9]+}}(%esp), %esi
740; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
741; X86-FALLBACK2-NEXT:    movl $-1, %edi
742; X86-FALLBACK2-NEXT:    movl $-1, %eax
743; X86-FALLBACK2-NEXT:    shrl %cl, %eax
744; X86-FALLBACK2-NEXT:    xorl %edx, %edx
745; X86-FALLBACK2-NEXT:    testb $32, %cl
746; X86-FALLBACK2-NEXT:    cmovel %eax, %edx
747; X86-FALLBACK2-NEXT:    cmovel %edi, %eax
748; X86-FALLBACK2-NEXT:    andl (%esi), %eax
749; X86-FALLBACK2-NEXT:    andl 4(%esi), %edx
750; X86-FALLBACK2-NEXT:    popl %esi
751; X86-FALLBACK2-NEXT:    popl %edi
752; X86-FALLBACK2-NEXT:    retl
753;
754; X86-BMI2-LABEL: clear_highbits64_c2_load:
755; X86-BMI2:       # %bb.0:
756; X86-BMI2-NEXT:    pushl %ebx
757; X86-BMI2-NEXT:    pushl %esi
758; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
759; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
760; X86-BMI2-NEXT:    movl $-1, %eax
761; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
762; X86-BMI2-NEXT:    xorl %edx, %edx
763; X86-BMI2-NEXT:    testb $32, %bl
764; X86-BMI2-NEXT:    cmovel %esi, %edx
765; X86-BMI2-NEXT:    cmovnel %esi, %eax
766; X86-BMI2-NEXT:    andl (%ecx), %eax
767; X86-BMI2-NEXT:    andl 4(%ecx), %edx
768; X86-BMI2-NEXT:    popl %esi
769; X86-BMI2-NEXT:    popl %ebx
770; X86-BMI2-NEXT:    retl
771;
772; X64-NOBMI2-LABEL: clear_highbits64_c2_load:
773; X64-NOBMI2:       # %bb.0:
774; X64-NOBMI2-NEXT:    movq %rsi, %rcx
775; X64-NOBMI2-NEXT:    movq (%rdi), %rax
776; X64-NOBMI2-NEXT:    shlq %cl, %rax
777; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
778; X64-NOBMI2-NEXT:    shrq %cl, %rax
779; X64-NOBMI2-NEXT:    retq
780;
781; X64-BMI2-LABEL: clear_highbits64_c2_load:
782; X64-BMI2:       # %bb.0:
783; X64-BMI2-NEXT:    shlxq %rsi, (%rdi), %rax
784; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
785; X64-BMI2-NEXT:    retq
786  %val = load i64, i64* %w
787  %mask = lshr i64 -1, %numhighbits
788  %masked = and i64 %mask, %val
789  ret i64 %masked
790}
791
792define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind {
793; X86-FALLBACK0-LABEL: clear_highbits64_c3_load_indexzext:
794; X86-FALLBACK0:       # %bb.0:
795; X86-FALLBACK0-NEXT:    pushl %edi
796; X86-FALLBACK0-NEXT:    pushl %esi
797; X86-FALLBACK0-NEXT:    movl {{[0-9]+}}(%esp), %esi
798; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
799; X86-FALLBACK0-NEXT:    movl $-1, %eax
800; X86-FALLBACK0-NEXT:    movl $-1, %edi
801; X86-FALLBACK0-NEXT:    shrl %cl, %edi
802; X86-FALLBACK0-NEXT:    xorl %edx, %edx
803; X86-FALLBACK0-NEXT:    testb $32, %cl
804; X86-FALLBACK0-NEXT:    jne .LBB16_1
805; X86-FALLBACK0-NEXT:  # %bb.2:
806; X86-FALLBACK0-NEXT:    movl %edi, %edx
807; X86-FALLBACK0-NEXT:    jmp .LBB16_3
808; X86-FALLBACK0-NEXT:  .LBB16_1:
809; X86-FALLBACK0-NEXT:    movl %edi, %eax
810; X86-FALLBACK0-NEXT:  .LBB16_3:
811; X86-FALLBACK0-NEXT:    andl (%esi), %eax
812; X86-FALLBACK0-NEXT:    andl 4(%esi), %edx
813; X86-FALLBACK0-NEXT:    popl %esi
814; X86-FALLBACK0-NEXT:    popl %edi
815; X86-FALLBACK0-NEXT:    retl
816;
817; X86-FALLBACK1-LABEL: clear_highbits64_c3_load_indexzext:
818; X86-FALLBACK1:       # %bb.0:
819; X86-FALLBACK1-NEXT:    pushl %edi
820; X86-FALLBACK1-NEXT:    pushl %esi
821; X86-FALLBACK1-NEXT:    movl {{[0-9]+}}(%esp), %esi
822; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
823; X86-FALLBACK1-NEXT:    movl $-1, %edi
824; X86-FALLBACK1-NEXT:    movl $-1, %eax
825; X86-FALLBACK1-NEXT:    shrl %cl, %eax
826; X86-FALLBACK1-NEXT:    xorl %edx, %edx
827; X86-FALLBACK1-NEXT:    testb $32, %cl
828; X86-FALLBACK1-NEXT:    cmovel %eax, %edx
829; X86-FALLBACK1-NEXT:    cmovel %edi, %eax
830; X86-FALLBACK1-NEXT:    andl (%esi), %eax
831; X86-FALLBACK1-NEXT:    andl 4(%esi), %edx
832; X86-FALLBACK1-NEXT:    popl %esi
833; X86-FALLBACK1-NEXT:    popl %edi
834; X86-FALLBACK1-NEXT:    retl
835;
836; X86-FALLBACK2-LABEL: clear_highbits64_c3_load_indexzext:
837; X86-FALLBACK2:       # %bb.0:
838; X86-FALLBACK2-NEXT:    pushl %edi
839; X86-FALLBACK2-NEXT:    pushl %esi
840; X86-FALLBACK2-NEXT:    movl {{[0-9]+}}(%esp), %esi
841; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
842; X86-FALLBACK2-NEXT:    movl $-1, %edi
843; X86-FALLBACK2-NEXT:    movl $-1, %eax
844; X86-FALLBACK2-NEXT:    shrl %cl, %eax
845; X86-FALLBACK2-NEXT:    xorl %edx, %edx
846; X86-FALLBACK2-NEXT:    testb $32, %cl
847; X86-FALLBACK2-NEXT:    cmovel %eax, %edx
848; X86-FALLBACK2-NEXT:    cmovel %edi, %eax
849; X86-FALLBACK2-NEXT:    andl (%esi), %eax
850; X86-FALLBACK2-NEXT:    andl 4(%esi), %edx
851; X86-FALLBACK2-NEXT:    popl %esi
852; X86-FALLBACK2-NEXT:    popl %edi
853; X86-FALLBACK2-NEXT:    retl
854;
855; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
856; X86-BMI2:       # %bb.0:
857; X86-BMI2-NEXT:    pushl %ebx
858; X86-BMI2-NEXT:    pushl %esi
859; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
860; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
861; X86-BMI2-NEXT:    movl $-1, %eax
862; X86-BMI2-NEXT:    shrxl %ebx, %eax, %esi
863; X86-BMI2-NEXT:    xorl %edx, %edx
864; X86-BMI2-NEXT:    testb $32, %bl
865; X86-BMI2-NEXT:    cmovel %esi, %edx
866; X86-BMI2-NEXT:    cmovnel %esi, %eax
867; X86-BMI2-NEXT:    andl (%ecx), %eax
868; X86-BMI2-NEXT:    andl 4(%ecx), %edx
869; X86-BMI2-NEXT:    popl %esi
870; X86-BMI2-NEXT:    popl %ebx
871; X86-BMI2-NEXT:    retl
872;
873; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext:
874; X64-NOBMI2:       # %bb.0:
875; X64-NOBMI2-NEXT:    movl %esi, %ecx
876; X64-NOBMI2-NEXT:    movq (%rdi), %rax
877; X64-NOBMI2-NEXT:    shlq %cl, %rax
878; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
879; X64-NOBMI2-NEXT:    shrq %cl, %rax
880; X64-NOBMI2-NEXT:    retq
881;
882; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext:
883; X64-BMI2:       # %bb.0:
884; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
885; X64-BMI2-NEXT:    shlxq %rsi, (%rdi), %rax
886; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
887; X64-BMI2-NEXT:    retq
888  %val = load i64, i64* %w
889  %sh_prom = zext i8 %numhighbits to i64
890  %mask = lshr i64 -1, %sh_prom
891  %masked = and i64 %mask, %val
892  ret i64 %masked
893}
894
895define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind {
896; X86-FALLBACK0-LABEL: clear_highbits64_c4_commutative:
897; X86-FALLBACK0:       # %bb.0:
898; X86-FALLBACK0-NEXT:    pushl %esi
899; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
900; X86-FALLBACK0-NEXT:    movl $-1, %eax
901; X86-FALLBACK0-NEXT:    movl $-1, %esi
902; X86-FALLBACK0-NEXT:    shrl %cl, %esi
903; X86-FALLBACK0-NEXT:    xorl %edx, %edx
904; X86-FALLBACK0-NEXT:    testb $32, %cl
905; X86-FALLBACK0-NEXT:    jne .LBB17_1
906; X86-FALLBACK0-NEXT:  # %bb.2:
907; X86-FALLBACK0-NEXT:    movl %esi, %edx
908; X86-FALLBACK0-NEXT:    jmp .LBB17_3
909; X86-FALLBACK0-NEXT:  .LBB17_1:
910; X86-FALLBACK0-NEXT:    movl %esi, %eax
911; X86-FALLBACK0-NEXT:  .LBB17_3:
912; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %eax
913; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %edx
914; X86-FALLBACK0-NEXT:    popl %esi
915; X86-FALLBACK0-NEXT:    retl
916;
917; X86-FALLBACK1-LABEL: clear_highbits64_c4_commutative:
918; X86-FALLBACK1:       # %bb.0:
919; X86-FALLBACK1-NEXT:    pushl %esi
920; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
921; X86-FALLBACK1-NEXT:    movl $-1, %esi
922; X86-FALLBACK1-NEXT:    movl $-1, %eax
923; X86-FALLBACK1-NEXT:    shrl %cl, %eax
924; X86-FALLBACK1-NEXT:    xorl %edx, %edx
925; X86-FALLBACK1-NEXT:    testb $32, %cl
926; X86-FALLBACK1-NEXT:    cmovel %eax, %edx
927; X86-FALLBACK1-NEXT:    cmovel %esi, %eax
928; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %eax
929; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %edx
930; X86-FALLBACK1-NEXT:    popl %esi
931; X86-FALLBACK1-NEXT:    retl
932;
933; X86-FALLBACK2-LABEL: clear_highbits64_c4_commutative:
934; X86-FALLBACK2:       # %bb.0:
935; X86-FALLBACK2-NEXT:    pushl %esi
936; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
937; X86-FALLBACK2-NEXT:    movl $-1, %esi
938; X86-FALLBACK2-NEXT:    movl $-1, %eax
939; X86-FALLBACK2-NEXT:    shrl %cl, %eax
940; X86-FALLBACK2-NEXT:    xorl %edx, %edx
941; X86-FALLBACK2-NEXT:    testb $32, %cl
942; X86-FALLBACK2-NEXT:    cmovel %eax, %edx
943; X86-FALLBACK2-NEXT:    cmovel %esi, %eax
944; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %eax
945; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %edx
946; X86-FALLBACK2-NEXT:    popl %esi
947; X86-FALLBACK2-NEXT:    retl
948;
949; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
950; X86-BMI2:       # %bb.0:
951; X86-BMI2-NEXT:    pushl %esi
952; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
953; X86-BMI2-NEXT:    movl $-1, %eax
954; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
955; X86-BMI2-NEXT:    xorl %edx, %edx
956; X86-BMI2-NEXT:    testb $32, %cl
957; X86-BMI2-NEXT:    cmovel %esi, %edx
958; X86-BMI2-NEXT:    cmovnel %esi, %eax
959; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
960; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
961; X86-BMI2-NEXT:    popl %esi
962; X86-BMI2-NEXT:    retl
963;
964; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative:
965; X64-NOBMI2:       # %bb.0:
966; X64-NOBMI2-NEXT:    movq %rsi, %rcx
967; X64-NOBMI2-NEXT:    movq %rdi, %rax
968; X64-NOBMI2-NEXT:    shlq %cl, %rax
969; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
970; X64-NOBMI2-NEXT:    shrq %cl, %rax
971; X64-NOBMI2-NEXT:    retq
972;
973; X64-BMI2-LABEL: clear_highbits64_c4_commutative:
974; X64-BMI2:       # %bb.0:
975; X64-BMI2-NEXT:    shlxq %rsi, %rdi, %rax
976; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
977; X64-BMI2-NEXT:    retq
978  %mask = lshr i64 -1, %numhighbits
979  %masked = and i64 %val, %mask ; swapped order
980  ret i64 %masked
981}
982
983; ---------------------------------------------------------------------------- ;
984; Multi-use tests
985; ---------------------------------------------------------------------------- ;
986
987declare void @use32(i32)
988declare void @use64(i64)
989
990define i32 @oneuse32(i32 %val, i32 %numhighbits) nounwind {
991; X86-NOBMI2-LABEL: oneuse32:
992; X86-NOBMI2:       # %bb.0:
993; X86-NOBMI2-NEXT:    pushl %esi
994; X86-NOBMI2-NEXT:    subl $8, %esp
995; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
996; X86-NOBMI2-NEXT:    movl $-1, %esi
997; X86-NOBMI2-NEXT:    shrl %cl, %esi
998; X86-NOBMI2-NEXT:    movl %esi, (%esp)
999; X86-NOBMI2-NEXT:    calll use32
1000; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1001; X86-NOBMI2-NEXT:    movl %esi, %eax
1002; X86-NOBMI2-NEXT:    addl $8, %esp
1003; X86-NOBMI2-NEXT:    popl %esi
1004; X86-NOBMI2-NEXT:    retl
1005;
1006; X86-BMI2-LABEL: oneuse32:
1007; X86-BMI2:       # %bb.0:
1008; X86-BMI2-NEXT:    pushl %esi
1009; X86-BMI2-NEXT:    subl $8, %esp
1010; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1011; X86-BMI2-NEXT:    movl $-1, %ecx
1012; X86-BMI2-NEXT:    shrxl %eax, %ecx, %esi
1013; X86-BMI2-NEXT:    movl %esi, (%esp)
1014; X86-BMI2-NEXT:    calll use32
1015; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1016; X86-BMI2-NEXT:    movl %esi, %eax
1017; X86-BMI2-NEXT:    addl $8, %esp
1018; X86-BMI2-NEXT:    popl %esi
1019; X86-BMI2-NEXT:    retl
1020;
1021; X64-NOBMI2-LABEL: oneuse32:
1022; X64-NOBMI2:       # %bb.0:
1023; X64-NOBMI2-NEXT:    pushq %rbp
1024; X64-NOBMI2-NEXT:    pushq %rbx
1025; X64-NOBMI2-NEXT:    pushq %rax
1026; X64-NOBMI2-NEXT:    movl %esi, %ecx
1027; X64-NOBMI2-NEXT:    movl %edi, %ebx
1028; X64-NOBMI2-NEXT:    movl $-1, %ebp
1029; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1030; X64-NOBMI2-NEXT:    shrl %cl, %ebp
1031; X64-NOBMI2-NEXT:    movl %ebp, %edi
1032; X64-NOBMI2-NEXT:    callq use32
1033; X64-NOBMI2-NEXT:    andl %ebx, %ebp
1034; X64-NOBMI2-NEXT:    movl %ebp, %eax
1035; X64-NOBMI2-NEXT:    addq $8, %rsp
1036; X64-NOBMI2-NEXT:    popq %rbx
1037; X64-NOBMI2-NEXT:    popq %rbp
1038; X64-NOBMI2-NEXT:    retq
1039;
1040; X64-BMI2-LABEL: oneuse32:
1041; X64-BMI2:       # %bb.0:
1042; X64-BMI2-NEXT:    pushq %rbp
1043; X64-BMI2-NEXT:    pushq %rbx
1044; X64-BMI2-NEXT:    pushq %rax
1045; X64-BMI2-NEXT:    movl %edi, %ebx
1046; X64-BMI2-NEXT:    movl $-1, %eax
1047; X64-BMI2-NEXT:    shrxl %esi, %eax, %ebp
1048; X64-BMI2-NEXT:    movl %ebp, %edi
1049; X64-BMI2-NEXT:    callq use32
1050; X64-BMI2-NEXT:    andl %ebx, %ebp
1051; X64-BMI2-NEXT:    movl %ebp, %eax
1052; X64-BMI2-NEXT:    addq $8, %rsp
1053; X64-BMI2-NEXT:    popq %rbx
1054; X64-BMI2-NEXT:    popq %rbp
1055; X64-BMI2-NEXT:    retq
1056  %mask = lshr i32 -1, %numhighbits
1057  call void @use32(i32 %mask)
1058  %masked = and i32 %mask, %val
1059  ret i32 %masked
1060}
1061
1062define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind {
1063; X86-FALLBACK0-LABEL: oneuse64:
1064; X86-FALLBACK0:       # %bb.0:
1065; X86-FALLBACK0-NEXT:    pushl %edi
1066; X86-FALLBACK0-NEXT:    pushl %esi
1067; X86-FALLBACK0-NEXT:    pushl %eax
1068; X86-FALLBACK0-NEXT:    movb {{[0-9]+}}(%esp), %cl
1069; X86-FALLBACK0-NEXT:    movl $-1, %esi
1070; X86-FALLBACK0-NEXT:    movl $-1, %edi
1071; X86-FALLBACK0-NEXT:    shrl %cl, %edi
1072; X86-FALLBACK0-NEXT:    testb $32, %cl
1073; X86-FALLBACK0-NEXT:    je .LBB19_2
1074; X86-FALLBACK0-NEXT:  # %bb.1:
1075; X86-FALLBACK0-NEXT:    movl %edi, %esi
1076; X86-FALLBACK0-NEXT:    xorl %edi, %edi
1077; X86-FALLBACK0-NEXT:  .LBB19_2:
1078; X86-FALLBACK0-NEXT:    subl $8, %esp
1079; X86-FALLBACK0-NEXT:    pushl %edi
1080; X86-FALLBACK0-NEXT:    pushl %esi
1081; X86-FALLBACK0-NEXT:    calll use64
1082; X86-FALLBACK0-NEXT:    addl $16, %esp
1083; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %esi
1084; X86-FALLBACK0-NEXT:    andl {{[0-9]+}}(%esp), %edi
1085; X86-FALLBACK0-NEXT:    movl %esi, %eax
1086; X86-FALLBACK0-NEXT:    movl %edi, %edx
1087; X86-FALLBACK0-NEXT:    addl $4, %esp
1088; X86-FALLBACK0-NEXT:    popl %esi
1089; X86-FALLBACK0-NEXT:    popl %edi
1090; X86-FALLBACK0-NEXT:    retl
1091;
1092; X86-FALLBACK1-LABEL: oneuse64:
1093; X86-FALLBACK1:       # %bb.0:
1094; X86-FALLBACK1-NEXT:    pushl %edi
1095; X86-FALLBACK1-NEXT:    pushl %esi
1096; X86-FALLBACK1-NEXT:    pushl %eax
1097; X86-FALLBACK1-NEXT:    movb {{[0-9]+}}(%esp), %cl
1098; X86-FALLBACK1-NEXT:    movl $-1, %esi
1099; X86-FALLBACK1-NEXT:    movl $-1, %eax
1100; X86-FALLBACK1-NEXT:    shrl %cl, %eax
1101; X86-FALLBACK1-NEXT:    xorl %edi, %edi
1102; X86-FALLBACK1-NEXT:    testb $32, %cl
1103; X86-FALLBACK1-NEXT:    cmovnel %eax, %esi
1104; X86-FALLBACK1-NEXT:    cmovel %eax, %edi
1105; X86-FALLBACK1-NEXT:    subl $8, %esp
1106; X86-FALLBACK1-NEXT:    pushl %edi
1107; X86-FALLBACK1-NEXT:    pushl %esi
1108; X86-FALLBACK1-NEXT:    calll use64
1109; X86-FALLBACK1-NEXT:    addl $16, %esp
1110; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %esi
1111; X86-FALLBACK1-NEXT:    andl {{[0-9]+}}(%esp), %edi
1112; X86-FALLBACK1-NEXT:    movl %esi, %eax
1113; X86-FALLBACK1-NEXT:    movl %edi, %edx
1114; X86-FALLBACK1-NEXT:    addl $4, %esp
1115; X86-FALLBACK1-NEXT:    popl %esi
1116; X86-FALLBACK1-NEXT:    popl %edi
1117; X86-FALLBACK1-NEXT:    retl
1118;
1119; X86-FALLBACK2-LABEL: oneuse64:
1120; X86-FALLBACK2:       # %bb.0:
1121; X86-FALLBACK2-NEXT:    pushl %edi
1122; X86-FALLBACK2-NEXT:    pushl %esi
1123; X86-FALLBACK2-NEXT:    pushl %eax
1124; X86-FALLBACK2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1125; X86-FALLBACK2-NEXT:    movl $-1, %esi
1126; X86-FALLBACK2-NEXT:    movl $-1, %eax
1127; X86-FALLBACK2-NEXT:    shrl %cl, %eax
1128; X86-FALLBACK2-NEXT:    xorl %edi, %edi
1129; X86-FALLBACK2-NEXT:    testb $32, %cl
1130; X86-FALLBACK2-NEXT:    cmovnel %eax, %esi
1131; X86-FALLBACK2-NEXT:    cmovel %eax, %edi
1132; X86-FALLBACK2-NEXT:    subl $8, %esp
1133; X86-FALLBACK2-NEXT:    pushl %edi
1134; X86-FALLBACK2-NEXT:    pushl %esi
1135; X86-FALLBACK2-NEXT:    calll use64
1136; X86-FALLBACK2-NEXT:    addl $16, %esp
1137; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1138; X86-FALLBACK2-NEXT:    andl {{[0-9]+}}(%esp), %edi
1139; X86-FALLBACK2-NEXT:    movl %esi, %eax
1140; X86-FALLBACK2-NEXT:    movl %edi, %edx
1141; X86-FALLBACK2-NEXT:    addl $4, %esp
1142; X86-FALLBACK2-NEXT:    popl %esi
1143; X86-FALLBACK2-NEXT:    popl %edi
1144; X86-FALLBACK2-NEXT:    retl
1145;
1146; X86-BMI2-LABEL: oneuse64:
1147; X86-BMI2:       # %bb.0:
1148; X86-BMI2-NEXT:    pushl %edi
1149; X86-BMI2-NEXT:    pushl %esi
1150; X86-BMI2-NEXT:    pushl %eax
1151; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1152; X86-BMI2-NEXT:    movl $-1, %esi
1153; X86-BMI2-NEXT:    shrxl %eax, %esi, %ecx
1154; X86-BMI2-NEXT:    xorl %edi, %edi
1155; X86-BMI2-NEXT:    testb $32, %al
1156; X86-BMI2-NEXT:    cmovnel %ecx, %esi
1157; X86-BMI2-NEXT:    cmovel %ecx, %edi
1158; X86-BMI2-NEXT:    subl $8, %esp
1159; X86-BMI2-NEXT:    pushl %edi
1160; X86-BMI2-NEXT:    pushl %esi
1161; X86-BMI2-NEXT:    calll use64
1162; X86-BMI2-NEXT:    addl $16, %esp
1163; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1164; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
1165; X86-BMI2-NEXT:    movl %esi, %eax
1166; X86-BMI2-NEXT:    movl %edi, %edx
1167; X86-BMI2-NEXT:    addl $4, %esp
1168; X86-BMI2-NEXT:    popl %esi
1169; X86-BMI2-NEXT:    popl %edi
1170; X86-BMI2-NEXT:    retl
1171;
1172; X64-NOBMI2-LABEL: oneuse64:
1173; X64-NOBMI2:       # %bb.0:
1174; X64-NOBMI2-NEXT:    pushq %r14
1175; X64-NOBMI2-NEXT:    pushq %rbx
1176; X64-NOBMI2-NEXT:    pushq %rax
1177; X64-NOBMI2-NEXT:    movq %rsi, %rcx
1178; X64-NOBMI2-NEXT:    movq %rdi, %r14
1179; X64-NOBMI2-NEXT:    movq $-1, %rbx
1180; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
1181; X64-NOBMI2-NEXT:    shrq %cl, %rbx
1182; X64-NOBMI2-NEXT:    movq %rbx, %rdi
1183; X64-NOBMI2-NEXT:    callq use64
1184; X64-NOBMI2-NEXT:    andq %r14, %rbx
1185; X64-NOBMI2-NEXT:    movq %rbx, %rax
1186; X64-NOBMI2-NEXT:    addq $8, %rsp
1187; X64-NOBMI2-NEXT:    popq %rbx
1188; X64-NOBMI2-NEXT:    popq %r14
1189; X64-NOBMI2-NEXT:    retq
1190;
1191; X64-BMI2-LABEL: oneuse64:
1192; X64-BMI2:       # %bb.0:
1193; X64-BMI2-NEXT:    pushq %r14
1194; X64-BMI2-NEXT:    pushq %rbx
1195; X64-BMI2-NEXT:    pushq %rax
1196; X64-BMI2-NEXT:    movq %rdi, %r14
1197; X64-BMI2-NEXT:    movq $-1, %rax
1198; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rbx
1199; X64-BMI2-NEXT:    movq %rbx, %rdi
1200; X64-BMI2-NEXT:    callq use64
1201; X64-BMI2-NEXT:    andq %r14, %rbx
1202; X64-BMI2-NEXT:    movq %rbx, %rax
1203; X64-BMI2-NEXT:    addq $8, %rsp
1204; X64-BMI2-NEXT:    popq %rbx
1205; X64-BMI2-NEXT:    popq %r14
1206; X64-BMI2-NEXT:    retq
1207  %mask = lshr i64 -1, %numhighbits
1208  call void @use64(i64 %mask)
1209  %masked = and i64 %mask, %val
1210  ret i64 %masked
1211}
1212