1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
4
5; These test cases are inspired by C++2a std::midpoint().
6; See https://bugs.llvm.org/show_bug.cgi?id=40965
7
8; ---------------------------------------------------------------------------- ;
9; 32-bit width
10; ---------------------------------------------------------------------------- ;
11
12; Values come from regs
13
14define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind {
15; X64-LABEL: scalar_i32_signed_reg_reg:
16; X64:       # %bb.0:
17; X64-NEXT:    xorl %eax, %eax
18; X64-NEXT:    cmpl %esi, %edi
19; X64-NEXT:    setle %al
20; X64-NEXT:    leal -1(%rax,%rax), %eax
21; X64-NEXT:    movl %edi, %ecx
22; X64-NEXT:    cmovgl %esi, %ecx
23; X64-NEXT:    cmovgl %edi, %esi
24; X64-NEXT:    subl %ecx, %esi
25; X64-NEXT:    shrl %esi
26; X64-NEXT:    imull %esi, %eax
27; X64-NEXT:    addl %edi, %eax
28; X64-NEXT:    retq
29;
30; X86-LABEL: scalar_i32_signed_reg_reg:
31; X86:       # %bb.0:
32; X86-NEXT:    pushl %esi
33; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
34; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
35; X86-NEXT:    xorl %edx, %edx
36; X86-NEXT:    cmpl %eax, %ecx
37; X86-NEXT:    setle %dl
38; X86-NEXT:    leal -1(%edx,%edx), %edx
39; X86-NEXT:    jg .LBB0_1
40; X86-NEXT:  # %bb.2:
41; X86-NEXT:    movl %ecx, %esi
42; X86-NEXT:    jmp .LBB0_3
43; X86-NEXT:  .LBB0_1:
44; X86-NEXT:    movl %eax, %esi
45; X86-NEXT:    movl %ecx, %eax
46; X86-NEXT:  .LBB0_3:
47; X86-NEXT:    subl %esi, %eax
48; X86-NEXT:    shrl %eax
49; X86-NEXT:    imull %edx, %eax
50; X86-NEXT:    addl %ecx, %eax
51; X86-NEXT:    popl %esi
52; X86-NEXT:    retl
53  %t3 = icmp sgt i32 %a1, %a2 ; signed
54  %t4 = select i1 %t3, i32 -1, i32 1
55  %t5 = select i1 %t3, i32 %a2, i32 %a1
56  %t6 = select i1 %t3, i32 %a1, i32 %a2
57  %t7 = sub i32 %t6, %t5
58  %t8 = lshr i32 %t7, 1
59  %t9 = mul nsw i32 %t8, %t4 ; signed
60  %a10 = add nsw i32 %t9, %a1 ; signed
61  ret i32 %a10
62}
63
64define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind {
65; X64-LABEL: scalar_i32_unsigned_reg_reg:
66; X64:       # %bb.0:
67; X64-NEXT:    xorl %eax, %eax
68; X64-NEXT:    cmpl %esi, %edi
69; X64-NEXT:    setbe %al
70; X64-NEXT:    leal -1(%rax,%rax), %eax
71; X64-NEXT:    movl %edi, %ecx
72; X64-NEXT:    cmoval %esi, %ecx
73; X64-NEXT:    cmoval %edi, %esi
74; X64-NEXT:    subl %ecx, %esi
75; X64-NEXT:    shrl %esi
76; X64-NEXT:    imull %esi, %eax
77; X64-NEXT:    addl %edi, %eax
78; X64-NEXT:    retq
79;
80; X86-LABEL: scalar_i32_unsigned_reg_reg:
81; X86:       # %bb.0:
82; X86-NEXT:    pushl %esi
83; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
84; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
85; X86-NEXT:    xorl %edx, %edx
86; X86-NEXT:    cmpl %eax, %ecx
87; X86-NEXT:    setbe %dl
88; X86-NEXT:    leal -1(%edx,%edx), %edx
89; X86-NEXT:    ja .LBB1_1
90; X86-NEXT:  # %bb.2:
91; X86-NEXT:    movl %ecx, %esi
92; X86-NEXT:    jmp .LBB1_3
93; X86-NEXT:  .LBB1_1:
94; X86-NEXT:    movl %eax, %esi
95; X86-NEXT:    movl %ecx, %eax
96; X86-NEXT:  .LBB1_3:
97; X86-NEXT:    subl %esi, %eax
98; X86-NEXT:    shrl %eax
99; X86-NEXT:    imull %edx, %eax
100; X86-NEXT:    addl %ecx, %eax
101; X86-NEXT:    popl %esi
102; X86-NEXT:    retl
103  %t3 = icmp ugt i32 %a1, %a2
104  %t4 = select i1 %t3, i32 -1, i32 1
105  %t5 = select i1 %t3, i32 %a2, i32 %a1
106  %t6 = select i1 %t3, i32 %a1, i32 %a2
107  %t7 = sub i32 %t6, %t5
108  %t8 = lshr i32 %t7, 1
109  %t9 = mul i32 %t8, %t4
110  %a10 = add i32 %t9, %a1
111  ret i32 %a10
112}
113
114; Values are loaded. Only check signed case.
115
116define i32 @scalar_i32_signed_mem_reg(i32* %a1_addr, i32 %a2) nounwind {
117; X64-LABEL: scalar_i32_signed_mem_reg:
118; X64:       # %bb.0:
119; X64-NEXT:    movl (%rdi), %ecx
120; X64-NEXT:    xorl %eax, %eax
121; X64-NEXT:    cmpl %esi, %ecx
122; X64-NEXT:    setle %al
123; X64-NEXT:    leal -1(%rax,%rax), %eax
124; X64-NEXT:    movl %ecx, %edx
125; X64-NEXT:    cmovgl %esi, %edx
126; X64-NEXT:    cmovgl %ecx, %esi
127; X64-NEXT:    subl %edx, %esi
128; X64-NEXT:    shrl %esi
129; X64-NEXT:    imull %esi, %eax
130; X64-NEXT:    addl %ecx, %eax
131; X64-NEXT:    retq
132;
133; X86-LABEL: scalar_i32_signed_mem_reg:
134; X86:       # %bb.0:
135; X86-NEXT:    pushl %esi
136; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
137; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
138; X86-NEXT:    movl (%ecx), %ecx
139; X86-NEXT:    xorl %edx, %edx
140; X86-NEXT:    cmpl %eax, %ecx
141; X86-NEXT:    setle %dl
142; X86-NEXT:    leal -1(%edx,%edx), %edx
143; X86-NEXT:    jg .LBB2_1
144; X86-NEXT:  # %bb.2:
145; X86-NEXT:    movl %ecx, %esi
146; X86-NEXT:    jmp .LBB2_3
147; X86-NEXT:  .LBB2_1:
148; X86-NEXT:    movl %eax, %esi
149; X86-NEXT:    movl %ecx, %eax
150; X86-NEXT:  .LBB2_3:
151; X86-NEXT:    subl %esi, %eax
152; X86-NEXT:    shrl %eax
153; X86-NEXT:    imull %edx, %eax
154; X86-NEXT:    addl %ecx, %eax
155; X86-NEXT:    popl %esi
156; X86-NEXT:    retl
157  %a1 = load i32, i32* %a1_addr
158  %t3 = icmp sgt i32 %a1, %a2 ; signed
159  %t4 = select i1 %t3, i32 -1, i32 1
160  %t5 = select i1 %t3, i32 %a2, i32 %a1
161  %t6 = select i1 %t3, i32 %a1, i32 %a2
162  %t7 = sub i32 %t6, %t5
163  %t8 = lshr i32 %t7, 1
164  %t9 = mul nsw i32 %t8, %t4 ; signed
165  %a10 = add nsw i32 %t9, %a1 ; signed
166  ret i32 %a10
167}
168
169define i32 @scalar_i32_signed_reg_mem(i32 %a1, i32* %a2_addr) nounwind {
170; X64-LABEL: scalar_i32_signed_reg_mem:
171; X64:       # %bb.0:
172; X64-NEXT:    movl (%rsi), %eax
173; X64-NEXT:    xorl %ecx, %ecx
174; X64-NEXT:    cmpl %eax, %edi
175; X64-NEXT:    setle %cl
176; X64-NEXT:    leal -1(%rcx,%rcx), %ecx
177; X64-NEXT:    movl %edi, %edx
178; X64-NEXT:    cmovgl %eax, %edx
179; X64-NEXT:    cmovgl %edi, %eax
180; X64-NEXT:    subl %edx, %eax
181; X64-NEXT:    shrl %eax
182; X64-NEXT:    imull %ecx, %eax
183; X64-NEXT:    addl %edi, %eax
184; X64-NEXT:    retq
185;
186; X86-LABEL: scalar_i32_signed_reg_mem:
187; X86:       # %bb.0:
188; X86-NEXT:    pushl %esi
189; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
190; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
191; X86-NEXT:    movl (%eax), %eax
192; X86-NEXT:    xorl %edx, %edx
193; X86-NEXT:    cmpl %eax, %ecx
194; X86-NEXT:    setle %dl
195; X86-NEXT:    leal -1(%edx,%edx), %edx
196; X86-NEXT:    jg .LBB3_1
197; X86-NEXT:  # %bb.2:
198; X86-NEXT:    movl %ecx, %esi
199; X86-NEXT:    jmp .LBB3_3
200; X86-NEXT:  .LBB3_1:
201; X86-NEXT:    movl %eax, %esi
202; X86-NEXT:    movl %ecx, %eax
203; X86-NEXT:  .LBB3_3:
204; X86-NEXT:    subl %esi, %eax
205; X86-NEXT:    shrl %eax
206; X86-NEXT:    imull %edx, %eax
207; X86-NEXT:    addl %ecx, %eax
208; X86-NEXT:    popl %esi
209; X86-NEXT:    retl
210  %a2 = load i32, i32* %a2_addr
211  %t3 = icmp sgt i32 %a1, %a2 ; signed
212  %t4 = select i1 %t3, i32 -1, i32 1
213  %t5 = select i1 %t3, i32 %a2, i32 %a1
214  %t6 = select i1 %t3, i32 %a1, i32 %a2
215  %t7 = sub i32 %t6, %t5
216  %t8 = lshr i32 %t7, 1
217  %t9 = mul nsw i32 %t8, %t4 ; signed
218  %a10 = add nsw i32 %t9, %a1 ; signed
219  ret i32 %a10
220}
221
222define i32 @scalar_i32_signed_mem_mem(i32* %a1_addr, i32* %a2_addr) nounwind {
223; X64-LABEL: scalar_i32_signed_mem_mem:
224; X64:       # %bb.0:
225; X64-NEXT:    movl (%rdi), %ecx
226; X64-NEXT:    movl (%rsi), %eax
227; X64-NEXT:    xorl %edx, %edx
228; X64-NEXT:    cmpl %eax, %ecx
229; X64-NEXT:    setle %dl
230; X64-NEXT:    leal -1(%rdx,%rdx), %edx
231; X64-NEXT:    movl %ecx, %esi
232; X64-NEXT:    cmovgl %eax, %esi
233; X64-NEXT:    cmovgl %ecx, %eax
234; X64-NEXT:    subl %esi, %eax
235; X64-NEXT:    shrl %eax
236; X64-NEXT:    imull %edx, %eax
237; X64-NEXT:    addl %ecx, %eax
238; X64-NEXT:    retq
239;
240; X86-LABEL: scalar_i32_signed_mem_mem:
241; X86:       # %bb.0:
242; X86-NEXT:    pushl %esi
243; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
245; X86-NEXT:    movl (%ecx), %ecx
246; X86-NEXT:    movl (%eax), %eax
247; X86-NEXT:    xorl %edx, %edx
248; X86-NEXT:    cmpl %eax, %ecx
249; X86-NEXT:    setle %dl
250; X86-NEXT:    leal -1(%edx,%edx), %edx
251; X86-NEXT:    jg .LBB4_1
252; X86-NEXT:  # %bb.2:
253; X86-NEXT:    movl %ecx, %esi
254; X86-NEXT:    jmp .LBB4_3
255; X86-NEXT:  .LBB4_1:
256; X86-NEXT:    movl %eax, %esi
257; X86-NEXT:    movl %ecx, %eax
258; X86-NEXT:  .LBB4_3:
259; X86-NEXT:    subl %esi, %eax
260; X86-NEXT:    shrl %eax
261; X86-NEXT:    imull %edx, %eax
262; X86-NEXT:    addl %ecx, %eax
263; X86-NEXT:    popl %esi
264; X86-NEXT:    retl
265  %a1 = load i32, i32* %a1_addr
266  %a2 = load i32, i32* %a2_addr
267  %t3 = icmp sgt i32 %a1, %a2 ; signed
268  %t4 = select i1 %t3, i32 -1, i32 1
269  %t5 = select i1 %t3, i32 %a2, i32 %a1
270  %t6 = select i1 %t3, i32 %a1, i32 %a2
271  %t7 = sub i32 %t6, %t5
272  %t8 = lshr i32 %t7, 1
273  %t9 = mul nsw i32 %t8, %t4 ; signed
274  %a10 = add nsw i32 %t9, %a1 ; signed
275  ret i32 %a10
276}
277
278; ---------------------------------------------------------------------------- ;
279; 64-bit width
280; ---------------------------------------------------------------------------- ;
281
282; Values come from regs
283
284define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind {
285; X64-LABEL: scalar_i64_signed_reg_reg:
286; X64:       # %bb.0:
287; X64-NEXT:    xorl %eax, %eax
288; X64-NEXT:    cmpq %rsi, %rdi
289; X64-NEXT:    setle %al
290; X64-NEXT:    leaq -1(%rax,%rax), %rax
291; X64-NEXT:    movq %rdi, %rcx
292; X64-NEXT:    cmovgq %rsi, %rcx
293; X64-NEXT:    cmovgq %rdi, %rsi
294; X64-NEXT:    subq %rcx, %rsi
295; X64-NEXT:    shrq %rsi
296; X64-NEXT:    imulq %rsi, %rax
297; X64-NEXT:    addq %rdi, %rax
298; X64-NEXT:    retq
299;
300; X86-LABEL: scalar_i64_signed_reg_reg:
301; X86:       # %bb.0:
302; X86-NEXT:    pushl %ebp
303; X86-NEXT:    pushl %ebx
304; X86-NEXT:    pushl %edi
305; X86-NEXT:    pushl %esi
306; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
307; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
308; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
309; X86-NEXT:    cmpl %ecx, %eax
310; X86-NEXT:    movl %edi, %edx
311; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
312; X86-NEXT:    movl $-1, %ebx
313; X86-NEXT:    jl .LBB5_1
314; X86-NEXT:  # %bb.2:
315; X86-NEXT:    xorl %ebp, %ebp
316; X86-NEXT:    movl $1, %ebx
317; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
318; X86-NEXT:    movl %ecx, %esi
319; X86-NEXT:    jmp .LBB5_3
320; X86-NEXT:  .LBB5_1:
321; X86-NEXT:    movl $-1, %ebp
322; X86-NEXT:    movl %edi, %edx
323; X86-NEXT:    movl %eax, %esi
324; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
325; X86-NEXT:    movl %ecx, %eax
326; X86-NEXT:  .LBB5_3:
327; X86-NEXT:    subl %esi, %eax
328; X86-NEXT:    sbbl %edx, %edi
329; X86-NEXT:    shrdl $1, %edi, %eax
330; X86-NEXT:    imull %eax, %ebp
331; X86-NEXT:    mull %ebx
332; X86-NEXT:    addl %ebp, %edx
333; X86-NEXT:    shrl %edi
334; X86-NEXT:    imull %ebx, %edi
335; X86-NEXT:    addl %edi, %edx
336; X86-NEXT:    addl %ecx, %eax
337; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
338; X86-NEXT:    popl %esi
339; X86-NEXT:    popl %edi
340; X86-NEXT:    popl %ebx
341; X86-NEXT:    popl %ebp
342; X86-NEXT:    retl
343  %t3 = icmp sgt i64 %a1, %a2 ; signed
344  %t4 = select i1 %t3, i64 -1, i64 1
345  %t5 = select i1 %t3, i64 %a2, i64 %a1
346  %t6 = select i1 %t3, i64 %a1, i64 %a2
347  %t7 = sub i64 %t6, %t5
348  %t8 = lshr i64 %t7, 1
349  %t9 = mul nsw i64 %t8, %t4 ; signed
350  %a10 = add nsw i64 %t9, %a1 ; signed
351  ret i64 %a10
352}
353
354define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind {
355; X64-LABEL: scalar_i64_unsigned_reg_reg:
356; X64:       # %bb.0:
357; X64-NEXT:    xorl %eax, %eax
358; X64-NEXT:    cmpq %rsi, %rdi
359; X64-NEXT:    setbe %al
360; X64-NEXT:    leaq -1(%rax,%rax), %rax
361; X64-NEXT:    movq %rdi, %rcx
362; X64-NEXT:    cmovaq %rsi, %rcx
363; X64-NEXT:    cmovaq %rdi, %rsi
364; X64-NEXT:    subq %rcx, %rsi
365; X64-NEXT:    shrq %rsi
366; X64-NEXT:    imulq %rsi, %rax
367; X64-NEXT:    addq %rdi, %rax
368; X64-NEXT:    retq
369;
370; X86-LABEL: scalar_i64_unsigned_reg_reg:
371; X86:       # %bb.0:
372; X86-NEXT:    pushl %ebp
373; X86-NEXT:    pushl %ebx
374; X86-NEXT:    pushl %edi
375; X86-NEXT:    pushl %esi
376; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
377; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
378; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
379; X86-NEXT:    cmpl %ecx, %eax
380; X86-NEXT:    movl %edi, %edx
381; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
382; X86-NEXT:    movl $-1, %ebx
383; X86-NEXT:    jb .LBB6_1
384; X86-NEXT:  # %bb.2:
385; X86-NEXT:    xorl %ebp, %ebp
386; X86-NEXT:    movl $1, %ebx
387; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
388; X86-NEXT:    movl %ecx, %esi
389; X86-NEXT:    jmp .LBB6_3
390; X86-NEXT:  .LBB6_1:
391; X86-NEXT:    movl $-1, %ebp
392; X86-NEXT:    movl %edi, %edx
393; X86-NEXT:    movl %eax, %esi
394; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
395; X86-NEXT:    movl %ecx, %eax
396; X86-NEXT:  .LBB6_3:
397; X86-NEXT:    subl %esi, %eax
398; X86-NEXT:    sbbl %edx, %edi
399; X86-NEXT:    shrdl $1, %edi, %eax
400; X86-NEXT:    imull %eax, %ebp
401; X86-NEXT:    mull %ebx
402; X86-NEXT:    addl %ebp, %edx
403; X86-NEXT:    shrl %edi
404; X86-NEXT:    imull %ebx, %edi
405; X86-NEXT:    addl %edi, %edx
406; X86-NEXT:    addl %ecx, %eax
407; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
408; X86-NEXT:    popl %esi
409; X86-NEXT:    popl %edi
410; X86-NEXT:    popl %ebx
411; X86-NEXT:    popl %ebp
412; X86-NEXT:    retl
413  %t3 = icmp ugt i64 %a1, %a2
414  %t4 = select i1 %t3, i64 -1, i64 1
415  %t5 = select i1 %t3, i64 %a2, i64 %a1
416  %t6 = select i1 %t3, i64 %a1, i64 %a2
417  %t7 = sub i64 %t6, %t5
418  %t8 = lshr i64 %t7, 1
419  %t9 = mul i64 %t8, %t4
420  %a10 = add i64 %t9, %a1
421  ret i64 %a10
422}
423
424; Values are loaded. Only check signed case.
425
426define i64 @scalar_i64_signed_mem_reg(i64* %a1_addr, i64 %a2) nounwind {
427; X64-LABEL: scalar_i64_signed_mem_reg:
428; X64:       # %bb.0:
429; X64-NEXT:    movq (%rdi), %rcx
430; X64-NEXT:    xorl %eax, %eax
431; X64-NEXT:    cmpq %rsi, %rcx
432; X64-NEXT:    setle %al
433; X64-NEXT:    leaq -1(%rax,%rax), %rax
434; X64-NEXT:    movq %rcx, %rdx
435; X64-NEXT:    cmovgq %rsi, %rdx
436; X64-NEXT:    cmovgq %rcx, %rsi
437; X64-NEXT:    subq %rdx, %rsi
438; X64-NEXT:    shrq %rsi
439; X64-NEXT:    imulq %rsi, %rax
440; X64-NEXT:    addq %rcx, %rax
441; X64-NEXT:    retq
442;
443; X86-LABEL: scalar_i64_signed_mem_reg:
444; X86:       # %bb.0:
445; X86-NEXT:    pushl %ebp
446; X86-NEXT:    pushl %ebx
447; X86-NEXT:    pushl %edi
448; X86-NEXT:    pushl %esi
449; X86-NEXT:    pushl %eax
450; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
451; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
452; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
453; X86-NEXT:    movl (%ecx), %esi
454; X86-NEXT:    movl 4(%ecx), %ecx
455; X86-NEXT:    cmpl %esi, %eax
456; X86-NEXT:    movl %edi, %edx
457; X86-NEXT:    sbbl %ecx, %edx
458; X86-NEXT:    movl $-1, %ebx
459; X86-NEXT:    jl .LBB7_1
460; X86-NEXT:  # %bb.2:
461; X86-NEXT:    xorl %ebp, %ebp
462; X86-NEXT:    movl $1, %ebx
463; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
464; X86-NEXT:    movl %esi, %edx
465; X86-NEXT:    jmp .LBB7_3
466; X86-NEXT:  .LBB7_1:
467; X86-NEXT:    movl $-1, %ebp
468; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
469; X86-NEXT:    movl %eax, %edx
470; X86-NEXT:    movl %ecx, %edi
471; X86-NEXT:    movl %esi, %eax
472; X86-NEXT:  .LBB7_3:
473; X86-NEXT:    subl %edx, %eax
474; X86-NEXT:    sbbl (%esp), %edi # 4-byte Folded Reload
475; X86-NEXT:    shrdl $1, %edi, %eax
476; X86-NEXT:    imull %eax, %ebp
477; X86-NEXT:    mull %ebx
478; X86-NEXT:    addl %ebp, %edx
479; X86-NEXT:    shrl %edi
480; X86-NEXT:    imull %ebx, %edi
481; X86-NEXT:    addl %edi, %edx
482; X86-NEXT:    addl %esi, %eax
483; X86-NEXT:    adcl %ecx, %edx
484; X86-NEXT:    addl $4, %esp
485; X86-NEXT:    popl %esi
486; X86-NEXT:    popl %edi
487; X86-NEXT:    popl %ebx
488; X86-NEXT:    popl %ebp
489; X86-NEXT:    retl
490  %a1 = load i64, i64* %a1_addr
491  %t3 = icmp sgt i64 %a1, %a2 ; signed
492  %t4 = select i1 %t3, i64 -1, i64 1
493  %t5 = select i1 %t3, i64 %a2, i64 %a1
494  %t6 = select i1 %t3, i64 %a1, i64 %a2
495  %t7 = sub i64 %t6, %t5
496  %t8 = lshr i64 %t7, 1
497  %t9 = mul nsw i64 %t8, %t4 ; signed
498  %a10 = add nsw i64 %t9, %a1 ; signed
499  ret i64 %a10
500}
501
502define i64 @scalar_i64_signed_reg_mem(i64 %a1, i64* %a2_addr) nounwind {
503; X64-LABEL: scalar_i64_signed_reg_mem:
504; X64:       # %bb.0:
505; X64-NEXT:    movq (%rsi), %rax
506; X64-NEXT:    xorl %ecx, %ecx
507; X64-NEXT:    cmpq %rax, %rdi
508; X64-NEXT:    setle %cl
509; X64-NEXT:    leaq -1(%rcx,%rcx), %rcx
510; X64-NEXT:    movq %rdi, %rdx
511; X64-NEXT:    cmovgq %rax, %rdx
512; X64-NEXT:    cmovgq %rdi, %rax
513; X64-NEXT:    subq %rdx, %rax
514; X64-NEXT:    shrq %rax
515; X64-NEXT:    imulq %rcx, %rax
516; X64-NEXT:    addq %rdi, %rax
517; X64-NEXT:    retq
518;
519; X86-LABEL: scalar_i64_signed_reg_mem:
520; X86:       # %bb.0:
521; X86-NEXT:    pushl %ebp
522; X86-NEXT:    pushl %ebx
523; X86-NEXT:    pushl %edi
524; X86-NEXT:    pushl %esi
525; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
526; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
527; X86-NEXT:    movl (%edx), %eax
528; X86-NEXT:    movl 4(%edx), %edi
529; X86-NEXT:    cmpl %ecx, %eax
530; X86-NEXT:    movl %edi, %edx
531; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
532; X86-NEXT:    movl $-1, %ebx
533; X86-NEXT:    jl .LBB8_1
534; X86-NEXT:  # %bb.2:
535; X86-NEXT:    xorl %ebp, %ebp
536; X86-NEXT:    movl $1, %ebx
537; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
538; X86-NEXT:    movl %ecx, %esi
539; X86-NEXT:    jmp .LBB8_3
540; X86-NEXT:  .LBB8_1:
541; X86-NEXT:    movl $-1, %ebp
542; X86-NEXT:    movl %edi, %edx
543; X86-NEXT:    movl %eax, %esi
544; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
545; X86-NEXT:    movl %ecx, %eax
546; X86-NEXT:  .LBB8_3:
547; X86-NEXT:    subl %esi, %eax
548; X86-NEXT:    sbbl %edx, %edi
549; X86-NEXT:    shrdl $1, %edi, %eax
550; X86-NEXT:    imull %eax, %ebp
551; X86-NEXT:    mull %ebx
552; X86-NEXT:    addl %ebp, %edx
553; X86-NEXT:    shrl %edi
554; X86-NEXT:    imull %ebx, %edi
555; X86-NEXT:    addl %edi, %edx
556; X86-NEXT:    addl %ecx, %eax
557; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
558; X86-NEXT:    popl %esi
559; X86-NEXT:    popl %edi
560; X86-NEXT:    popl %ebx
561; X86-NEXT:    popl %ebp
562; X86-NEXT:    retl
563  %a2 = load i64, i64* %a2_addr
564  %t3 = icmp sgt i64 %a1, %a2 ; signed
565  %t4 = select i1 %t3, i64 -1, i64 1
566  %t5 = select i1 %t3, i64 %a2, i64 %a1
567  %t6 = select i1 %t3, i64 %a1, i64 %a2
568  %t7 = sub i64 %t6, %t5
569  %t8 = lshr i64 %t7, 1
570  %t9 = mul nsw i64 %t8, %t4 ; signed
571  %a10 = add nsw i64 %t9, %a1 ; signed
572  ret i64 %a10
573}
574
575define i64 @scalar_i64_signed_mem_mem(i64* %a1_addr, i64* %a2_addr) nounwind {
576; X64-LABEL: scalar_i64_signed_mem_mem:
577; X64:       # %bb.0:
578; X64-NEXT:    movq (%rdi), %rcx
579; X64-NEXT:    movq (%rsi), %rax
580; X64-NEXT:    xorl %edx, %edx
581; X64-NEXT:    cmpq %rax, %rcx
582; X64-NEXT:    setle %dl
583; X64-NEXT:    leaq -1(%rdx,%rdx), %rdx
584; X64-NEXT:    movq %rcx, %rsi
585; X64-NEXT:    cmovgq %rax, %rsi
586; X64-NEXT:    cmovgq %rcx, %rax
587; X64-NEXT:    subq %rsi, %rax
588; X64-NEXT:    shrq %rax
589; X64-NEXT:    imulq %rdx, %rax
590; X64-NEXT:    addq %rcx, %rax
591; X64-NEXT:    retq
592;
593; X86-LABEL: scalar_i64_signed_mem_mem:
594; X86:       # %bb.0:
595; X86-NEXT:    pushl %ebp
596; X86-NEXT:    pushl %ebx
597; X86-NEXT:    pushl %edi
598; X86-NEXT:    pushl %esi
599; X86-NEXT:    pushl %eax
600; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
601; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
602; X86-NEXT:    movl (%eax), %esi
603; X86-NEXT:    movl 4(%eax), %ecx
604; X86-NEXT:    movl (%edx), %eax
605; X86-NEXT:    movl 4(%edx), %edi
606; X86-NEXT:    cmpl %esi, %eax
607; X86-NEXT:    movl %edi, %edx
608; X86-NEXT:    sbbl %ecx, %edx
609; X86-NEXT:    movl $-1, %ebx
610; X86-NEXT:    jl .LBB9_1
611; X86-NEXT:  # %bb.2:
612; X86-NEXT:    xorl %ebp, %ebp
613; X86-NEXT:    movl $1, %ebx
614; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
615; X86-NEXT:    movl %esi, %edx
616; X86-NEXT:    jmp .LBB9_3
617; X86-NEXT:  .LBB9_1:
618; X86-NEXT:    movl $-1, %ebp
619; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
620; X86-NEXT:    movl %eax, %edx
621; X86-NEXT:    movl %ecx, %edi
622; X86-NEXT:    movl %esi, %eax
623; X86-NEXT:  .LBB9_3:
624; X86-NEXT:    subl %edx, %eax
625; X86-NEXT:    sbbl (%esp), %edi # 4-byte Folded Reload
626; X86-NEXT:    shrdl $1, %edi, %eax
627; X86-NEXT:    imull %eax, %ebp
628; X86-NEXT:    mull %ebx
629; X86-NEXT:    addl %ebp, %edx
630; X86-NEXT:    shrl %edi
631; X86-NEXT:    imull %ebx, %edi
632; X86-NEXT:    addl %edi, %edx
633; X86-NEXT:    addl %esi, %eax
634; X86-NEXT:    adcl %ecx, %edx
635; X86-NEXT:    addl $4, %esp
636; X86-NEXT:    popl %esi
637; X86-NEXT:    popl %edi
638; X86-NEXT:    popl %ebx
639; X86-NEXT:    popl %ebp
640; X86-NEXT:    retl
641  %a1 = load i64, i64* %a1_addr
642  %a2 = load i64, i64* %a2_addr
643  %t3 = icmp sgt i64 %a1, %a2 ; signed
644  %t4 = select i1 %t3, i64 -1, i64 1
645  %t5 = select i1 %t3, i64 %a2, i64 %a1
646  %t6 = select i1 %t3, i64 %a1, i64 %a2
647  %t7 = sub i64 %t6, %t5
648  %t8 = lshr i64 %t7, 1
649  %t9 = mul nsw i64 %t8, %t4 ; signed
650  %a10 = add nsw i64 %t9, %a1 ; signed
651  ret i64 %a10
652}
653
654; ---------------------------------------------------------------------------- ;
655; 16-bit width
656; ---------------------------------------------------------------------------- ;
657
658; Values come from regs
659
660define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind {
661; X64-LABEL: scalar_i16_signed_reg_reg:
662; X64:       # %bb.0:
663; X64-NEXT:    xorl %eax, %eax
664; X64-NEXT:    cmpw %si, %di
665; X64-NEXT:    setle %al
666; X64-NEXT:    leal -1(%rax,%rax), %ecx
667; X64-NEXT:    movl %edi, %eax
668; X64-NEXT:    cmovgl %esi, %eax
669; X64-NEXT:    cmovgl %edi, %esi
670; X64-NEXT:    subl %eax, %esi
671; X64-NEXT:    movzwl %si, %eax
672; X64-NEXT:    shrl %eax
673; X64-NEXT:    imull %ecx, %eax
674; X64-NEXT:    addl %edi, %eax
675; X64-NEXT:    # kill: def $ax killed $ax killed $eax
676; X64-NEXT:    retq
677;
678; X86-LABEL: scalar_i16_signed_reg_reg:
679; X86:       # %bb.0:
680; X86-NEXT:    pushl %esi
681; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
682; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
683; X86-NEXT:    xorl %edx, %edx
684; X86-NEXT:    cmpw %ax, %cx
685; X86-NEXT:    setle %dl
686; X86-NEXT:    leal -1(%edx,%edx), %edx
687; X86-NEXT:    jg .LBB10_1
688; X86-NEXT:  # %bb.2:
689; X86-NEXT:    movl %ecx, %esi
690; X86-NEXT:    jmp .LBB10_3
691; X86-NEXT:  .LBB10_1:
692; X86-NEXT:    movl %eax, %esi
693; X86-NEXT:    movl %ecx, %eax
694; X86-NEXT:  .LBB10_3:
695; X86-NEXT:    subl %esi, %eax
696; X86-NEXT:    movzwl %ax, %eax
697; X86-NEXT:    shrl %eax
698; X86-NEXT:    imull %edx, %eax
699; X86-NEXT:    addl %ecx, %eax
700; X86-NEXT:    # kill: def $ax killed $ax killed $eax
701; X86-NEXT:    popl %esi
702; X86-NEXT:    retl
703  %t3 = icmp sgt i16 %a1, %a2 ; signed
704  %t4 = select i1 %t3, i16 -1, i16 1
705  %t5 = select i1 %t3, i16 %a2, i16 %a1
706  %t6 = select i1 %t3, i16 %a1, i16 %a2
707  %t7 = sub i16 %t6, %t5
708  %t8 = lshr i16 %t7, 1
709  %t9 = mul nsw i16 %t8, %t4 ; signed
710  %a10 = add nsw i16 %t9, %a1 ; signed
711  ret i16 %a10
712}
713
714define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind {
715; X64-LABEL: scalar_i16_unsigned_reg_reg:
716; X64:       # %bb.0:
717; X64-NEXT:    xorl %eax, %eax
718; X64-NEXT:    cmpw %si, %di
719; X64-NEXT:    setbe %al
720; X64-NEXT:    leal -1(%rax,%rax), %ecx
721; X64-NEXT:    movl %edi, %eax
722; X64-NEXT:    cmoval %esi, %eax
723; X64-NEXT:    cmoval %edi, %esi
724; X64-NEXT:    subl %eax, %esi
725; X64-NEXT:    movzwl %si, %eax
726; X64-NEXT:    shrl %eax
727; X64-NEXT:    imull %ecx, %eax
728; X64-NEXT:    addl %edi, %eax
729; X64-NEXT:    # kill: def $ax killed $ax killed $eax
730; X64-NEXT:    retq
731;
732; X86-LABEL: scalar_i16_unsigned_reg_reg:
733; X86:       # %bb.0:
734; X86-NEXT:    pushl %esi
735; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
736; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
737; X86-NEXT:    xorl %edx, %edx
738; X86-NEXT:    cmpw %ax, %cx
739; X86-NEXT:    setbe %dl
740; X86-NEXT:    leal -1(%edx,%edx), %edx
741; X86-NEXT:    ja .LBB11_1
742; X86-NEXT:  # %bb.2:
743; X86-NEXT:    movl %ecx, %esi
744; X86-NEXT:    jmp .LBB11_3
745; X86-NEXT:  .LBB11_1:
746; X86-NEXT:    movl %eax, %esi
747; X86-NEXT:    movl %ecx, %eax
748; X86-NEXT:  .LBB11_3:
749; X86-NEXT:    subl %esi, %eax
750; X86-NEXT:    movzwl %ax, %eax
751; X86-NEXT:    shrl %eax
752; X86-NEXT:    imull %edx, %eax
753; X86-NEXT:    addl %ecx, %eax
754; X86-NEXT:    # kill: def $ax killed $ax killed $eax
755; X86-NEXT:    popl %esi
756; X86-NEXT:    retl
757  %t3 = icmp ugt i16 %a1, %a2
758  %t4 = select i1 %t3, i16 -1, i16 1
759  %t5 = select i1 %t3, i16 %a2, i16 %a1
760  %t6 = select i1 %t3, i16 %a1, i16 %a2
761  %t7 = sub i16 %t6, %t5
762  %t8 = lshr i16 %t7, 1
763  %t9 = mul i16 %t8, %t4
764  %a10 = add i16 %t9, %a1
765  ret i16 %a10
766}
767
768; Values are loaded. Only check signed case.
769
770define i16 @scalar_i16_signed_mem_reg(i16* %a1_addr, i16 %a2) nounwind {
771; X64-LABEL: scalar_i16_signed_mem_reg:
772; X64:       # %bb.0:
773; X64-NEXT:    movzwl (%rdi), %ecx
774; X64-NEXT:    xorl %eax, %eax
775; X64-NEXT:    cmpw %si, %cx
776; X64-NEXT:    setle %al
777; X64-NEXT:    leal -1(%rax,%rax), %edx
778; X64-NEXT:    movl %ecx, %eax
779; X64-NEXT:    cmovgl %esi, %eax
780; X64-NEXT:    cmovgl %ecx, %esi
781; X64-NEXT:    subl %eax, %esi
782; X64-NEXT:    movzwl %si, %eax
783; X64-NEXT:    shrl %eax
784; X64-NEXT:    imull %edx, %eax
785; X64-NEXT:    addl %ecx, %eax
786; X64-NEXT:    # kill: def $ax killed $ax killed $eax
787; X64-NEXT:    retq
788;
789; X86-LABEL: scalar_i16_signed_mem_reg:
790; X86:       # %bb.0:
791; X86-NEXT:    pushl %esi
792; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
793; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
794; X86-NEXT:    movzwl (%ecx), %ecx
795; X86-NEXT:    xorl %edx, %edx
796; X86-NEXT:    cmpw %ax, %cx
797; X86-NEXT:    setle %dl
798; X86-NEXT:    leal -1(%edx,%edx), %edx
799; X86-NEXT:    jg .LBB12_1
800; X86-NEXT:  # %bb.2:
801; X86-NEXT:    movl %ecx, %esi
802; X86-NEXT:    jmp .LBB12_3
803; X86-NEXT:  .LBB12_1:
804; X86-NEXT:    movl %eax, %esi
805; X86-NEXT:    movl %ecx, %eax
806; X86-NEXT:  .LBB12_3:
807; X86-NEXT:    subl %esi, %eax
808; X86-NEXT:    movzwl %ax, %eax
809; X86-NEXT:    shrl %eax
810; X86-NEXT:    imull %edx, %eax
811; X86-NEXT:    addl %ecx, %eax
812; X86-NEXT:    # kill: def $ax killed $ax killed $eax
813; X86-NEXT:    popl %esi
814; X86-NEXT:    retl
815  %a1 = load i16, i16* %a1_addr
816  %t3 = icmp sgt i16 %a1, %a2 ; signed
817  %t4 = select i1 %t3, i16 -1, i16 1
818  %t5 = select i1 %t3, i16 %a2, i16 %a1
819  %t6 = select i1 %t3, i16 %a1, i16 %a2
820  %t7 = sub i16 %t6, %t5
821  %t8 = lshr i16 %t7, 1
822  %t9 = mul nsw i16 %t8, %t4 ; signed
823  %a10 = add nsw i16 %t9, %a1 ; signed
824  ret i16 %a10
825}
826
827define i16 @scalar_i16_signed_reg_mem(i16 %a1, i16* %a2_addr) nounwind {
828; X64-LABEL: scalar_i16_signed_reg_mem:
829; X64:       # %bb.0:
830; X64-NEXT:    movzwl (%rsi), %eax
831; X64-NEXT:    xorl %ecx, %ecx
832; X64-NEXT:    cmpw %ax, %di
833; X64-NEXT:    setle %cl
834; X64-NEXT:    leal -1(%rcx,%rcx), %ecx
835; X64-NEXT:    movl %edi, %edx
836; X64-NEXT:    cmovgl %eax, %edx
837; X64-NEXT:    cmovgl %edi, %eax
838; X64-NEXT:    subl %edx, %eax
839; X64-NEXT:    movzwl %ax, %eax
840; X64-NEXT:    shrl %eax
841; X64-NEXT:    imull %ecx, %eax
842; X64-NEXT:    addl %edi, %eax
843; X64-NEXT:    # kill: def $ax killed $ax killed $eax
844; X64-NEXT:    retq
845;
846; X86-LABEL: scalar_i16_signed_reg_mem:
847; X86:       # %bb.0:
848; X86-NEXT:    pushl %esi
849; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
850; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
851; X86-NEXT:    movzwl (%eax), %eax
852; X86-NEXT:    xorl %edx, %edx
853; X86-NEXT:    cmpw %ax, %cx
854; X86-NEXT:    setle %dl
855; X86-NEXT:    leal -1(%edx,%edx), %edx
856; X86-NEXT:    jg .LBB13_1
857; X86-NEXT:  # %bb.2:
858; X86-NEXT:    movl %ecx, %esi
859; X86-NEXT:    jmp .LBB13_3
860; X86-NEXT:  .LBB13_1:
861; X86-NEXT:    movl %eax, %esi
862; X86-NEXT:    movl %ecx, %eax
863; X86-NEXT:  .LBB13_3:
864; X86-NEXT:    subl %esi, %eax
865; X86-NEXT:    movzwl %ax, %eax
866; X86-NEXT:    shrl %eax
867; X86-NEXT:    imull %edx, %eax
868; X86-NEXT:    addl %ecx, %eax
869; X86-NEXT:    # kill: def $ax killed $ax killed $eax
870; X86-NEXT:    popl %esi
871; X86-NEXT:    retl
872  %a2 = load i16, i16* %a2_addr
873  %t3 = icmp sgt i16 %a1, %a2 ; signed
874  %t4 = select i1 %t3, i16 -1, i16 1
875  %t5 = select i1 %t3, i16 %a2, i16 %a1
876  %t6 = select i1 %t3, i16 %a1, i16 %a2
877  %t7 = sub i16 %t6, %t5
878  %t8 = lshr i16 %t7, 1
879  %t9 = mul nsw i16 %t8, %t4 ; signed
880  %a10 = add nsw i16 %t9, %a1 ; signed
881  ret i16 %a10
882}
883
884define i16 @scalar_i16_signed_mem_mem(i16* %a1_addr, i16* %a2_addr) nounwind {
885; X64-LABEL: scalar_i16_signed_mem_mem:
886; X64:       # %bb.0:
887; X64-NEXT:    movzwl (%rdi), %ecx
888; X64-NEXT:    movzwl (%rsi), %eax
889; X64-NEXT:    xorl %edx, %edx
890; X64-NEXT:    cmpw %ax, %cx
891; X64-NEXT:    setle %dl
892; X64-NEXT:    leal -1(%rdx,%rdx), %edx
893; X64-NEXT:    movl %ecx, %esi
894; X64-NEXT:    cmovgl %eax, %esi
895; X64-NEXT:    cmovgl %ecx, %eax
896; X64-NEXT:    subl %esi, %eax
897; X64-NEXT:    movzwl %ax, %eax
898; X64-NEXT:    shrl %eax
899; X64-NEXT:    imull %edx, %eax
900; X64-NEXT:    addl %ecx, %eax
901; X64-NEXT:    # kill: def $ax killed $ax killed $eax
902; X64-NEXT:    retq
903;
904; X86-LABEL: scalar_i16_signed_mem_mem:
905; X86:       # %bb.0:
906; X86-NEXT:    pushl %esi
907; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
908; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
909; X86-NEXT:    movzwl (%ecx), %ecx
910; X86-NEXT:    movzwl (%eax), %eax
911; X86-NEXT:    xorl %edx, %edx
912; X86-NEXT:    cmpw %ax, %cx
913; X86-NEXT:    setle %dl
914; X86-NEXT:    leal -1(%edx,%edx), %edx
915; X86-NEXT:    jg .LBB14_1
916; X86-NEXT:  # %bb.2:
917; X86-NEXT:    movl %ecx, %esi
918; X86-NEXT:    jmp .LBB14_3
919; X86-NEXT:  .LBB14_1:
920; X86-NEXT:    movl %eax, %esi
921; X86-NEXT:    movl %ecx, %eax
922; X86-NEXT:  .LBB14_3:
923; X86-NEXT:    subl %esi, %eax
924; X86-NEXT:    movzwl %ax, %eax
925; X86-NEXT:    shrl %eax
926; X86-NEXT:    imull %edx, %eax
927; X86-NEXT:    addl %ecx, %eax
928; X86-NEXT:    # kill: def $ax killed $ax killed $eax
929; X86-NEXT:    popl %esi
930; X86-NEXT:    retl
931  %a1 = load i16, i16* %a1_addr
932  %a2 = load i16, i16* %a2_addr
933  %t3 = icmp sgt i16 %a1, %a2 ; signed
934  %t4 = select i1 %t3, i16 -1, i16 1
935  %t5 = select i1 %t3, i16 %a2, i16 %a1
936  %t6 = select i1 %t3, i16 %a1, i16 %a2
937  %t7 = sub i16 %t6, %t5
938  %t8 = lshr i16 %t7, 1
939  %t9 = mul nsw i16 %t8, %t4 ; signed
940  %a10 = add nsw i16 %t9, %a1 ; signed
941  ret i16 %a10
942}
943
944; ---------------------------------------------------------------------------- ;
945; 8-bit width
946; ---------------------------------------------------------------------------- ;
947
948; Values come from regs
949
950define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
951; X64-LABEL: scalar_i8_signed_reg_reg:
952; X64:       # %bb.0:
953; X64-NEXT:    movl %esi, %eax
954; X64-NEXT:    cmpb %al, %dil
955; X64-NEXT:    setle %cl
956; X64-NEXT:    movl %edi, %edx
957; X64-NEXT:    cmovgl %esi, %edx
958; X64-NEXT:    cmovgl %edi, %eax
959; X64-NEXT:    addb %cl, %cl
960; X64-NEXT:    decb %cl
961; X64-NEXT:    subb %dl, %al
962; X64-NEXT:    shrb %al
963; X64-NEXT:    # kill: def $al killed $al killed $eax
964; X64-NEXT:    mulb %cl
965; X64-NEXT:    addb %dil, %al
966; X64-NEXT:    retq
967;
968; X86-LABEL: scalar_i8_signed_reg_reg:
969; X86:       # %bb.0:
970; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
971; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
972; X86-NEXT:    cmpb %al, %cl
973; X86-NEXT:    setle %dl
974; X86-NEXT:    jg .LBB15_1
975; X86-NEXT:  # %bb.2:
976; X86-NEXT:    movb %cl, %ah
977; X86-NEXT:    jmp .LBB15_3
978; X86-NEXT:  .LBB15_1:
979; X86-NEXT:    movb %al, %ah
980; X86-NEXT:    movb %cl, %al
981; X86-NEXT:  .LBB15_3:
982; X86-NEXT:    subb %ah, %al
983; X86-NEXT:    addb %dl, %dl
984; X86-NEXT:    decb %dl
985; X86-NEXT:    shrb %al
986; X86-NEXT:    mulb %dl
987; X86-NEXT:    addb %cl, %al
988; X86-NEXT:    retl
989  %t3 = icmp sgt i8 %a1, %a2 ; signed
990  %t4 = select i1 %t3, i8 -1, i8 1
991  %t5 = select i1 %t3, i8 %a2, i8 %a1
992  %t6 = select i1 %t3, i8 %a1, i8 %a2
993  %t7 = sub i8 %t6, %t5
994  %t8 = lshr i8 %t7, 1
995  %t9 = mul nsw i8 %t8, %t4 ; signed
996  %a10 = add nsw i8 %t9, %a1 ; signed
997  ret i8 %a10
998}
999
1000define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
1001; X64-LABEL: scalar_i8_unsigned_reg_reg:
1002; X64:       # %bb.0:
1003; X64-NEXT:    movl %esi, %eax
1004; X64-NEXT:    cmpb %al, %dil
1005; X64-NEXT:    setbe %cl
1006; X64-NEXT:    movl %edi, %edx
1007; X64-NEXT:    cmoval %esi, %edx
1008; X64-NEXT:    cmoval %edi, %eax
1009; X64-NEXT:    addb %cl, %cl
1010; X64-NEXT:    decb %cl
1011; X64-NEXT:    subb %dl, %al
1012; X64-NEXT:    shrb %al
1013; X64-NEXT:    # kill: def $al killed $al killed $eax
1014; X64-NEXT:    mulb %cl
1015; X64-NEXT:    addb %dil, %al
1016; X64-NEXT:    retq
1017;
1018; X86-LABEL: scalar_i8_unsigned_reg_reg:
1019; X86:       # %bb.0:
1020; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1021; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
1022; X86-NEXT:    cmpb %al, %cl
1023; X86-NEXT:    setbe %dl
1024; X86-NEXT:    ja .LBB16_1
1025; X86-NEXT:  # %bb.2:
1026; X86-NEXT:    movb %cl, %ah
1027; X86-NEXT:    jmp .LBB16_3
1028; X86-NEXT:  .LBB16_1:
1029; X86-NEXT:    movb %al, %ah
1030; X86-NEXT:    movb %cl, %al
1031; X86-NEXT:  .LBB16_3:
1032; X86-NEXT:    subb %ah, %al
1033; X86-NEXT:    addb %dl, %dl
1034; X86-NEXT:    decb %dl
1035; X86-NEXT:    shrb %al
1036; X86-NEXT:    mulb %dl
1037; X86-NEXT:    addb %cl, %al
1038; X86-NEXT:    retl
1039  %t3 = icmp ugt i8 %a1, %a2
1040  %t4 = select i1 %t3, i8 -1, i8 1
1041  %t5 = select i1 %t3, i8 %a2, i8 %a1
1042  %t6 = select i1 %t3, i8 %a1, i8 %a2
1043  %t7 = sub i8 %t6, %t5
1044  %t8 = lshr i8 %t7, 1
1045  %t9 = mul i8 %t8, %t4
1046  %a10 = add i8 %t9, %a1
1047  ret i8 %a10
1048}
1049
1050; Values are loaded. Only check signed case.
1051
1052define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind {
1053; X64-LABEL: scalar_i8_signed_mem_reg:
1054; X64:       # %bb.0:
1055; X64-NEXT:    movzbl (%rdi), %ecx
1056; X64-NEXT:    cmpb %sil, %cl
1057; X64-NEXT:    setle %dl
1058; X64-NEXT:    movl %ecx, %edi
1059; X64-NEXT:    cmovgl %esi, %edi
1060; X64-NEXT:    movl %ecx, %eax
1061; X64-NEXT:    cmovlel %esi, %eax
1062; X64-NEXT:    addb %dl, %dl
1063; X64-NEXT:    decb %dl
1064; X64-NEXT:    subb %dil, %al
1065; X64-NEXT:    shrb %al
1066; X64-NEXT:    # kill: def $al killed $al killed $eax
1067; X64-NEXT:    mulb %dl
1068; X64-NEXT:    addb %cl, %al
1069; X64-NEXT:    retq
1070;
1071; X86-LABEL: scalar_i8_signed_mem_reg:
1072; X86:       # %bb.0:
1073; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
1074; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1075; X86-NEXT:    movb (%ecx), %cl
1076; X86-NEXT:    cmpb %al, %cl
1077; X86-NEXT:    setle %dl
1078; X86-NEXT:    jg .LBB17_1
1079; X86-NEXT:  # %bb.2:
1080; X86-NEXT:    movb %cl, %ah
1081; X86-NEXT:    jmp .LBB17_3
1082; X86-NEXT:  .LBB17_1:
1083; X86-NEXT:    movb %al, %ah
1084; X86-NEXT:    movb %cl, %al
1085; X86-NEXT:  .LBB17_3:
1086; X86-NEXT:    subb %ah, %al
1087; X86-NEXT:    addb %dl, %dl
1088; X86-NEXT:    decb %dl
1089; X86-NEXT:    shrb %al
1090; X86-NEXT:    mulb %dl
1091; X86-NEXT:    addb %cl, %al
1092; X86-NEXT:    retl
1093  %a1 = load i8, i8* %a1_addr
1094  %t3 = icmp sgt i8 %a1, %a2 ; signed
1095  %t4 = select i1 %t3, i8 -1, i8 1
1096  %t5 = select i1 %t3, i8 %a2, i8 %a1
1097  %t6 = select i1 %t3, i8 %a1, i8 %a2
1098  %t7 = sub i8 %t6, %t5
1099  %t8 = lshr i8 %t7, 1
1100  %t9 = mul nsw i8 %t8, %t4 ; signed
1101  %a10 = add nsw i8 %t9, %a1 ; signed
1102  ret i8 %a10
1103}
1104
1105define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind {
1106; X64-LABEL: scalar_i8_signed_reg_mem:
1107; X64:       # %bb.0:
1108; X64-NEXT:    movzbl (%rsi), %eax
1109; X64-NEXT:    cmpb %al, %dil
1110; X64-NEXT:    setle %cl
1111; X64-NEXT:    movl %edi, %edx
1112; X64-NEXT:    cmovgl %eax, %edx
1113; X64-NEXT:    cmovgl %edi, %eax
1114; X64-NEXT:    addb %cl, %cl
1115; X64-NEXT:    decb %cl
1116; X64-NEXT:    subb %dl, %al
1117; X64-NEXT:    shrb %al
1118; X64-NEXT:    # kill: def $al killed $al killed $eax
1119; X64-NEXT:    mulb %cl
1120; X64-NEXT:    addb %dil, %al
1121; X64-NEXT:    retq
1122;
1123; X86-LABEL: scalar_i8_signed_reg_mem:
1124; X86:       # %bb.0:
1125; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
1126; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1127; X86-NEXT:    movb (%eax), %al
1128; X86-NEXT:    cmpb %al, %cl
1129; X86-NEXT:    setle %dl
1130; X86-NEXT:    jg .LBB18_1
1131; X86-NEXT:  # %bb.2:
1132; X86-NEXT:    movb %cl, %ah
1133; X86-NEXT:    jmp .LBB18_3
1134; X86-NEXT:  .LBB18_1:
1135; X86-NEXT:    movb %al, %ah
1136; X86-NEXT:    movb %cl, %al
1137; X86-NEXT:  .LBB18_3:
1138; X86-NEXT:    subb %ah, %al
1139; X86-NEXT:    addb %dl, %dl
1140; X86-NEXT:    decb %dl
1141; X86-NEXT:    shrb %al
1142; X86-NEXT:    mulb %dl
1143; X86-NEXT:    addb %cl, %al
1144; X86-NEXT:    retl
1145  %a2 = load i8, i8* %a2_addr
1146  %t3 = icmp sgt i8 %a1, %a2 ; signed
1147  %t4 = select i1 %t3, i8 -1, i8 1
1148  %t5 = select i1 %t3, i8 %a2, i8 %a1
1149  %t6 = select i1 %t3, i8 %a1, i8 %a2
1150  %t7 = sub i8 %t6, %t5
1151  %t8 = lshr i8 %t7, 1
1152  %t9 = mul nsw i8 %t8, %t4 ; signed
1153  %a10 = add nsw i8 %t9, %a1 ; signed
1154  ret i8 %a10
1155}
1156
1157define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind {
1158; X64-LABEL: scalar_i8_signed_mem_mem:
1159; X64:       # %bb.0:
1160; X64-NEXT:    movzbl (%rdi), %ecx
1161; X64-NEXT:    movzbl (%rsi), %eax
1162; X64-NEXT:    cmpb %al, %cl
1163; X64-NEXT:    setle %dl
1164; X64-NEXT:    movl %ecx, %esi
1165; X64-NEXT:    cmovgl %eax, %esi
1166; X64-NEXT:    cmovgl %ecx, %eax
1167; X64-NEXT:    addb %dl, %dl
1168; X64-NEXT:    decb %dl
1169; X64-NEXT:    subb %sil, %al
1170; X64-NEXT:    shrb %al
1171; X64-NEXT:    # kill: def $al killed $al killed $eax
1172; X64-NEXT:    mulb %dl
1173; X64-NEXT:    addb %cl, %al
1174; X64-NEXT:    retq
1175;
1176; X86-LABEL: scalar_i8_signed_mem_mem:
1177; X86:       # %bb.0:
1178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
1179; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1180; X86-NEXT:    movb (%ecx), %cl
1181; X86-NEXT:    movb (%eax), %al
1182; X86-NEXT:    cmpb %al, %cl
1183; X86-NEXT:    setle %dl
1184; X86-NEXT:    jg .LBB19_1
1185; X86-NEXT:  # %bb.2:
1186; X86-NEXT:    movb %cl, %ah
1187; X86-NEXT:    jmp .LBB19_3
1188; X86-NEXT:  .LBB19_1:
1189; X86-NEXT:    movb %al, %ah
1190; X86-NEXT:    movb %cl, %al
1191; X86-NEXT:  .LBB19_3:
1192; X86-NEXT:    subb %ah, %al
1193; X86-NEXT:    addb %dl, %dl
1194; X86-NEXT:    decb %dl
1195; X86-NEXT:    shrb %al
1196; X86-NEXT:    mulb %dl
1197; X86-NEXT:    addb %cl, %al
1198; X86-NEXT:    retl
1199  %a1 = load i8, i8* %a1_addr
1200  %a2 = load i8, i8* %a2_addr
1201  %t3 = icmp sgt i8 %a1, %a2 ; signed
1202  %t4 = select i1 %t3, i8 -1, i8 1
1203  %t5 = select i1 %t3, i8 %a2, i8 %a1
1204  %t6 = select i1 %t3, i8 %a1, i8 %a2
1205  %t7 = sub i8 %t6, %t5
1206  %t8 = lshr i8 %t7, 1
1207  %t9 = mul nsw i8 %t8, %t4 ; signed
1208  %a10 = add nsw i8 %t9, %a1 ; signed
1209  ret i8 %a10
1210}
1211