1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 4 5; These test cases are inspired by C++2a std::midpoint(). 6; See https://bugs.llvm.org/show_bug.cgi?id=40965 7 8; ---------------------------------------------------------------------------- ; 9; 32-bit width 10; ---------------------------------------------------------------------------- ; 11 12; Values come from regs 13 14define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { 15; X64-LABEL: scalar_i32_signed_reg_reg: 16; X64: # %bb.0: 17; X64-NEXT: xorl %eax, %eax 18; X64-NEXT: cmpl %esi, %edi 19; X64-NEXT: setle %al 20; X64-NEXT: leal -1(%rax,%rax), %eax 21; X64-NEXT: movl %edi, %ecx 22; X64-NEXT: cmovgl %esi, %ecx 23; X64-NEXT: cmovgl %edi, %esi 24; X64-NEXT: subl %ecx, %esi 25; X64-NEXT: shrl %esi 26; X64-NEXT: imull %esi, %eax 27; X64-NEXT: addl %edi, %eax 28; X64-NEXT: retq 29; 30; X86-LABEL: scalar_i32_signed_reg_reg: 31; X86: # %bb.0: 32; X86-NEXT: pushl %esi 33; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 34; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 35; X86-NEXT: xorl %edx, %edx 36; X86-NEXT: cmpl %eax, %ecx 37; X86-NEXT: setle %dl 38; X86-NEXT: leal -1(%edx,%edx), %edx 39; X86-NEXT: jg .LBB0_1 40; X86-NEXT: # %bb.2: 41; X86-NEXT: movl %ecx, %esi 42; X86-NEXT: jmp .LBB0_3 43; X86-NEXT: .LBB0_1: 44; X86-NEXT: movl %eax, %esi 45; X86-NEXT: movl %ecx, %eax 46; X86-NEXT: .LBB0_3: 47; X86-NEXT: subl %esi, %eax 48; X86-NEXT: shrl %eax 49; X86-NEXT: imull %edx, %eax 50; X86-NEXT: addl %ecx, %eax 51; X86-NEXT: popl %esi 52; X86-NEXT: retl 53 %t3 = icmp sgt i32 %a1, %a2 ; signed 54 %t4 = select i1 %t3, i32 -1, i32 1 55 %t5 = select i1 %t3, i32 %a2, i32 %a1 56 %t6 = select i1 %t3, i32 %a1, i32 %a2 57 %t7 = sub i32 %t6, %t5 58 %t8 = lshr i32 %t7, 1 59 %t9 = mul nsw i32 %t8, %t4 ; signed 60 %a10 = add nsw i32 %t9, %a1 ; signed 61 ret i32 %a10 62} 63 64define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { 65; X64-LABEL: scalar_i32_unsigned_reg_reg: 66; X64: # %bb.0: 67; X64-NEXT: xorl %eax, %eax 68; X64-NEXT: cmpl %esi, %edi 69; X64-NEXT: setbe %al 70; X64-NEXT: leal -1(%rax,%rax), %eax 71; X64-NEXT: movl %edi, %ecx 72; X64-NEXT: cmoval %esi, %ecx 73; X64-NEXT: cmoval %edi, %esi 74; X64-NEXT: subl %ecx, %esi 75; X64-NEXT: shrl %esi 76; X64-NEXT: imull %esi, %eax 77; X64-NEXT: addl %edi, %eax 78; X64-NEXT: retq 79; 80; X86-LABEL: scalar_i32_unsigned_reg_reg: 81; X86: # %bb.0: 82; X86-NEXT: pushl %esi 83; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 84; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 85; X86-NEXT: xorl %edx, %edx 86; X86-NEXT: cmpl %eax, %ecx 87; X86-NEXT: setbe %dl 88; X86-NEXT: leal -1(%edx,%edx), %edx 89; X86-NEXT: ja .LBB1_1 90; X86-NEXT: # %bb.2: 91; X86-NEXT: movl %ecx, %esi 92; X86-NEXT: jmp .LBB1_3 93; X86-NEXT: .LBB1_1: 94; X86-NEXT: movl %eax, %esi 95; X86-NEXT: movl %ecx, %eax 96; X86-NEXT: .LBB1_3: 97; X86-NEXT: subl %esi, %eax 98; X86-NEXT: shrl %eax 99; X86-NEXT: imull %edx, %eax 100; X86-NEXT: addl %ecx, %eax 101; X86-NEXT: popl %esi 102; X86-NEXT: retl 103 %t3 = icmp ugt i32 %a1, %a2 104 %t4 = select i1 %t3, i32 -1, i32 1 105 %t5 = select i1 %t3, i32 %a2, i32 %a1 106 %t6 = select i1 %t3, i32 %a1, i32 %a2 107 %t7 = sub i32 %t6, %t5 108 %t8 = lshr i32 %t7, 1 109 %t9 = mul i32 %t8, %t4 110 %a10 = add i32 %t9, %a1 111 ret i32 %a10 112} 113 114; Values are loaded. Only check signed case. 115 116define i32 @scalar_i32_signed_mem_reg(i32* %a1_addr, i32 %a2) nounwind { 117; X64-LABEL: scalar_i32_signed_mem_reg: 118; X64: # %bb.0: 119; X64-NEXT: movl (%rdi), %ecx 120; X64-NEXT: xorl %eax, %eax 121; X64-NEXT: cmpl %esi, %ecx 122; X64-NEXT: setle %al 123; X64-NEXT: leal -1(%rax,%rax), %eax 124; X64-NEXT: movl %ecx, %edx 125; X64-NEXT: cmovgl %esi, %edx 126; X64-NEXT: cmovgl %ecx, %esi 127; X64-NEXT: subl %edx, %esi 128; X64-NEXT: shrl %esi 129; X64-NEXT: imull %esi, %eax 130; X64-NEXT: addl %ecx, %eax 131; X64-NEXT: retq 132; 133; X86-LABEL: scalar_i32_signed_mem_reg: 134; X86: # %bb.0: 135; X86-NEXT: pushl %esi 136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 137; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 138; X86-NEXT: movl (%ecx), %ecx 139; X86-NEXT: xorl %edx, %edx 140; X86-NEXT: cmpl %eax, %ecx 141; X86-NEXT: setle %dl 142; X86-NEXT: leal -1(%edx,%edx), %edx 143; X86-NEXT: jg .LBB2_1 144; X86-NEXT: # %bb.2: 145; X86-NEXT: movl %ecx, %esi 146; X86-NEXT: jmp .LBB2_3 147; X86-NEXT: .LBB2_1: 148; X86-NEXT: movl %eax, %esi 149; X86-NEXT: movl %ecx, %eax 150; X86-NEXT: .LBB2_3: 151; X86-NEXT: subl %esi, %eax 152; X86-NEXT: shrl %eax 153; X86-NEXT: imull %edx, %eax 154; X86-NEXT: addl %ecx, %eax 155; X86-NEXT: popl %esi 156; X86-NEXT: retl 157 %a1 = load i32, i32* %a1_addr 158 %t3 = icmp sgt i32 %a1, %a2 ; signed 159 %t4 = select i1 %t3, i32 -1, i32 1 160 %t5 = select i1 %t3, i32 %a2, i32 %a1 161 %t6 = select i1 %t3, i32 %a1, i32 %a2 162 %t7 = sub i32 %t6, %t5 163 %t8 = lshr i32 %t7, 1 164 %t9 = mul nsw i32 %t8, %t4 ; signed 165 %a10 = add nsw i32 %t9, %a1 ; signed 166 ret i32 %a10 167} 168 169define i32 @scalar_i32_signed_reg_mem(i32 %a1, i32* %a2_addr) nounwind { 170; X64-LABEL: scalar_i32_signed_reg_mem: 171; X64: # %bb.0: 172; X64-NEXT: movl (%rsi), %eax 173; X64-NEXT: xorl %ecx, %ecx 174; X64-NEXT: cmpl %eax, %edi 175; X64-NEXT: setle %cl 176; X64-NEXT: leal -1(%rcx,%rcx), %ecx 177; X64-NEXT: movl %edi, %edx 178; X64-NEXT: cmovgl %eax, %edx 179; X64-NEXT: cmovgl %edi, %eax 180; X64-NEXT: subl %edx, %eax 181; X64-NEXT: shrl %eax 182; X64-NEXT: imull %ecx, %eax 183; X64-NEXT: addl %edi, %eax 184; X64-NEXT: retq 185; 186; X86-LABEL: scalar_i32_signed_reg_mem: 187; X86: # %bb.0: 188; X86-NEXT: pushl %esi 189; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 190; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 191; X86-NEXT: movl (%eax), %eax 192; X86-NEXT: xorl %edx, %edx 193; X86-NEXT: cmpl %eax, %ecx 194; X86-NEXT: setle %dl 195; X86-NEXT: leal -1(%edx,%edx), %edx 196; X86-NEXT: jg .LBB3_1 197; X86-NEXT: # %bb.2: 198; X86-NEXT: movl %ecx, %esi 199; X86-NEXT: jmp .LBB3_3 200; X86-NEXT: .LBB3_1: 201; X86-NEXT: movl %eax, %esi 202; X86-NEXT: movl %ecx, %eax 203; X86-NEXT: .LBB3_3: 204; X86-NEXT: subl %esi, %eax 205; X86-NEXT: shrl %eax 206; X86-NEXT: imull %edx, %eax 207; X86-NEXT: addl %ecx, %eax 208; X86-NEXT: popl %esi 209; X86-NEXT: retl 210 %a2 = load i32, i32* %a2_addr 211 %t3 = icmp sgt i32 %a1, %a2 ; signed 212 %t4 = select i1 %t3, i32 -1, i32 1 213 %t5 = select i1 %t3, i32 %a2, i32 %a1 214 %t6 = select i1 %t3, i32 %a1, i32 %a2 215 %t7 = sub i32 %t6, %t5 216 %t8 = lshr i32 %t7, 1 217 %t9 = mul nsw i32 %t8, %t4 ; signed 218 %a10 = add nsw i32 %t9, %a1 ; signed 219 ret i32 %a10 220} 221 222define i32 @scalar_i32_signed_mem_mem(i32* %a1_addr, i32* %a2_addr) nounwind { 223; X64-LABEL: scalar_i32_signed_mem_mem: 224; X64: # %bb.0: 225; X64-NEXT: movl (%rdi), %ecx 226; X64-NEXT: movl (%rsi), %eax 227; X64-NEXT: xorl %edx, %edx 228; X64-NEXT: cmpl %eax, %ecx 229; X64-NEXT: setle %dl 230; X64-NEXT: leal -1(%rdx,%rdx), %edx 231; X64-NEXT: movl %ecx, %esi 232; X64-NEXT: cmovgl %eax, %esi 233; X64-NEXT: cmovgl %ecx, %eax 234; X64-NEXT: subl %esi, %eax 235; X64-NEXT: shrl %eax 236; X64-NEXT: imull %edx, %eax 237; X64-NEXT: addl %ecx, %eax 238; X64-NEXT: retq 239; 240; X86-LABEL: scalar_i32_signed_mem_mem: 241; X86: # %bb.0: 242; X86-NEXT: pushl %esi 243; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 244; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 245; X86-NEXT: movl (%ecx), %ecx 246; X86-NEXT: movl (%eax), %eax 247; X86-NEXT: xorl %edx, %edx 248; X86-NEXT: cmpl %eax, %ecx 249; X86-NEXT: setle %dl 250; X86-NEXT: leal -1(%edx,%edx), %edx 251; X86-NEXT: jg .LBB4_1 252; X86-NEXT: # %bb.2: 253; X86-NEXT: movl %ecx, %esi 254; X86-NEXT: jmp .LBB4_3 255; X86-NEXT: .LBB4_1: 256; X86-NEXT: movl %eax, %esi 257; X86-NEXT: movl %ecx, %eax 258; X86-NEXT: .LBB4_3: 259; X86-NEXT: subl %esi, %eax 260; X86-NEXT: shrl %eax 261; X86-NEXT: imull %edx, %eax 262; X86-NEXT: addl %ecx, %eax 263; X86-NEXT: popl %esi 264; X86-NEXT: retl 265 %a1 = load i32, i32* %a1_addr 266 %a2 = load i32, i32* %a2_addr 267 %t3 = icmp sgt i32 %a1, %a2 ; signed 268 %t4 = select i1 %t3, i32 -1, i32 1 269 %t5 = select i1 %t3, i32 %a2, i32 %a1 270 %t6 = select i1 %t3, i32 %a1, i32 %a2 271 %t7 = sub i32 %t6, %t5 272 %t8 = lshr i32 %t7, 1 273 %t9 = mul nsw i32 %t8, %t4 ; signed 274 %a10 = add nsw i32 %t9, %a1 ; signed 275 ret i32 %a10 276} 277 278; ---------------------------------------------------------------------------- ; 279; 64-bit width 280; ---------------------------------------------------------------------------- ; 281 282; Values come from regs 283 284define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { 285; X64-LABEL: scalar_i64_signed_reg_reg: 286; X64: # %bb.0: 287; X64-NEXT: xorl %eax, %eax 288; X64-NEXT: cmpq %rsi, %rdi 289; X64-NEXT: setle %al 290; X64-NEXT: leaq -1(%rax,%rax), %rax 291; X64-NEXT: movq %rdi, %rcx 292; X64-NEXT: cmovgq %rsi, %rcx 293; X64-NEXT: cmovgq %rdi, %rsi 294; X64-NEXT: subq %rcx, %rsi 295; X64-NEXT: shrq %rsi 296; X64-NEXT: imulq %rsi, %rax 297; X64-NEXT: addq %rdi, %rax 298; X64-NEXT: retq 299; 300; X86-LABEL: scalar_i64_signed_reg_reg: 301; X86: # %bb.0: 302; X86-NEXT: pushl %ebp 303; X86-NEXT: pushl %ebx 304; X86-NEXT: pushl %edi 305; X86-NEXT: pushl %esi 306; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 307; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 308; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 309; X86-NEXT: cmpl %ecx, %eax 310; X86-NEXT: movl %edi, %edx 311; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 312; X86-NEXT: movl $-1, %ebx 313; X86-NEXT: jl .LBB5_1 314; X86-NEXT: # %bb.2: 315; X86-NEXT: xorl %ebp, %ebp 316; X86-NEXT: movl $1, %ebx 317; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 318; X86-NEXT: movl %ecx, %esi 319; X86-NEXT: jmp .LBB5_3 320; X86-NEXT: .LBB5_1: 321; X86-NEXT: movl $-1, %ebp 322; X86-NEXT: movl %edi, %edx 323; X86-NEXT: movl %eax, %esi 324; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 325; X86-NEXT: movl %ecx, %eax 326; X86-NEXT: .LBB5_3: 327; X86-NEXT: subl %esi, %eax 328; X86-NEXT: sbbl %edx, %edi 329; X86-NEXT: shrdl $1, %edi, %eax 330; X86-NEXT: imull %eax, %ebp 331; X86-NEXT: mull %ebx 332; X86-NEXT: addl %ebp, %edx 333; X86-NEXT: shrl %edi 334; X86-NEXT: imull %ebx, %edi 335; X86-NEXT: addl %edi, %edx 336; X86-NEXT: addl %ecx, %eax 337; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 338; X86-NEXT: popl %esi 339; X86-NEXT: popl %edi 340; X86-NEXT: popl %ebx 341; X86-NEXT: popl %ebp 342; X86-NEXT: retl 343 %t3 = icmp sgt i64 %a1, %a2 ; signed 344 %t4 = select i1 %t3, i64 -1, i64 1 345 %t5 = select i1 %t3, i64 %a2, i64 %a1 346 %t6 = select i1 %t3, i64 %a1, i64 %a2 347 %t7 = sub i64 %t6, %t5 348 %t8 = lshr i64 %t7, 1 349 %t9 = mul nsw i64 %t8, %t4 ; signed 350 %a10 = add nsw i64 %t9, %a1 ; signed 351 ret i64 %a10 352} 353 354define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { 355; X64-LABEL: scalar_i64_unsigned_reg_reg: 356; X64: # %bb.0: 357; X64-NEXT: xorl %eax, %eax 358; X64-NEXT: cmpq %rsi, %rdi 359; X64-NEXT: setbe %al 360; X64-NEXT: leaq -1(%rax,%rax), %rax 361; X64-NEXT: movq %rdi, %rcx 362; X64-NEXT: cmovaq %rsi, %rcx 363; X64-NEXT: cmovaq %rdi, %rsi 364; X64-NEXT: subq %rcx, %rsi 365; X64-NEXT: shrq %rsi 366; X64-NEXT: imulq %rsi, %rax 367; X64-NEXT: addq %rdi, %rax 368; X64-NEXT: retq 369; 370; X86-LABEL: scalar_i64_unsigned_reg_reg: 371; X86: # %bb.0: 372; X86-NEXT: pushl %ebp 373; X86-NEXT: pushl %ebx 374; X86-NEXT: pushl %edi 375; X86-NEXT: pushl %esi 376; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 379; X86-NEXT: cmpl %ecx, %eax 380; X86-NEXT: movl %edi, %edx 381; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 382; X86-NEXT: movl $-1, %ebx 383; X86-NEXT: jb .LBB6_1 384; X86-NEXT: # %bb.2: 385; X86-NEXT: xorl %ebp, %ebp 386; X86-NEXT: movl $1, %ebx 387; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 388; X86-NEXT: movl %ecx, %esi 389; X86-NEXT: jmp .LBB6_3 390; X86-NEXT: .LBB6_1: 391; X86-NEXT: movl $-1, %ebp 392; X86-NEXT: movl %edi, %edx 393; X86-NEXT: movl %eax, %esi 394; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 395; X86-NEXT: movl %ecx, %eax 396; X86-NEXT: .LBB6_3: 397; X86-NEXT: subl %esi, %eax 398; X86-NEXT: sbbl %edx, %edi 399; X86-NEXT: shrdl $1, %edi, %eax 400; X86-NEXT: imull %eax, %ebp 401; X86-NEXT: mull %ebx 402; X86-NEXT: addl %ebp, %edx 403; X86-NEXT: shrl %edi 404; X86-NEXT: imull %ebx, %edi 405; X86-NEXT: addl %edi, %edx 406; X86-NEXT: addl %ecx, %eax 407; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 408; X86-NEXT: popl %esi 409; X86-NEXT: popl %edi 410; X86-NEXT: popl %ebx 411; X86-NEXT: popl %ebp 412; X86-NEXT: retl 413 %t3 = icmp ugt i64 %a1, %a2 414 %t4 = select i1 %t3, i64 -1, i64 1 415 %t5 = select i1 %t3, i64 %a2, i64 %a1 416 %t6 = select i1 %t3, i64 %a1, i64 %a2 417 %t7 = sub i64 %t6, %t5 418 %t8 = lshr i64 %t7, 1 419 %t9 = mul i64 %t8, %t4 420 %a10 = add i64 %t9, %a1 421 ret i64 %a10 422} 423 424; Values are loaded. Only check signed case. 425 426define i64 @scalar_i64_signed_mem_reg(i64* %a1_addr, i64 %a2) nounwind { 427; X64-LABEL: scalar_i64_signed_mem_reg: 428; X64: # %bb.0: 429; X64-NEXT: movq (%rdi), %rcx 430; X64-NEXT: xorl %eax, %eax 431; X64-NEXT: cmpq %rsi, %rcx 432; X64-NEXT: setle %al 433; X64-NEXT: leaq -1(%rax,%rax), %rax 434; X64-NEXT: movq %rcx, %rdx 435; X64-NEXT: cmovgq %rsi, %rdx 436; X64-NEXT: cmovgq %rcx, %rsi 437; X64-NEXT: subq %rdx, %rsi 438; X64-NEXT: shrq %rsi 439; X64-NEXT: imulq %rsi, %rax 440; X64-NEXT: addq %rcx, %rax 441; X64-NEXT: retq 442; 443; X86-LABEL: scalar_i64_signed_mem_reg: 444; X86: # %bb.0: 445; X86-NEXT: pushl %ebp 446; X86-NEXT: pushl %ebx 447; X86-NEXT: pushl %edi 448; X86-NEXT: pushl %esi 449; X86-NEXT: pushl %eax 450; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 451; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 452; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 453; X86-NEXT: movl (%ecx), %esi 454; X86-NEXT: movl 4(%ecx), %ecx 455; X86-NEXT: cmpl %esi, %eax 456; X86-NEXT: movl %edi, %edx 457; X86-NEXT: sbbl %ecx, %edx 458; X86-NEXT: movl $-1, %ebx 459; X86-NEXT: jl .LBB7_1 460; X86-NEXT: # %bb.2: 461; X86-NEXT: xorl %ebp, %ebp 462; X86-NEXT: movl $1, %ebx 463; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 464; X86-NEXT: movl %esi, %edx 465; X86-NEXT: jmp .LBB7_3 466; X86-NEXT: .LBB7_1: 467; X86-NEXT: movl $-1, %ebp 468; X86-NEXT: movl %edi, (%esp) # 4-byte Spill 469; X86-NEXT: movl %eax, %edx 470; X86-NEXT: movl %ecx, %edi 471; X86-NEXT: movl %esi, %eax 472; X86-NEXT: .LBB7_3: 473; X86-NEXT: subl %edx, %eax 474; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload 475; X86-NEXT: shrdl $1, %edi, %eax 476; X86-NEXT: imull %eax, %ebp 477; X86-NEXT: mull %ebx 478; X86-NEXT: addl %ebp, %edx 479; X86-NEXT: shrl %edi 480; X86-NEXT: imull %ebx, %edi 481; X86-NEXT: addl %edi, %edx 482; X86-NEXT: addl %esi, %eax 483; X86-NEXT: adcl %ecx, %edx 484; X86-NEXT: addl $4, %esp 485; X86-NEXT: popl %esi 486; X86-NEXT: popl %edi 487; X86-NEXT: popl %ebx 488; X86-NEXT: popl %ebp 489; X86-NEXT: retl 490 %a1 = load i64, i64* %a1_addr 491 %t3 = icmp sgt i64 %a1, %a2 ; signed 492 %t4 = select i1 %t3, i64 -1, i64 1 493 %t5 = select i1 %t3, i64 %a2, i64 %a1 494 %t6 = select i1 %t3, i64 %a1, i64 %a2 495 %t7 = sub i64 %t6, %t5 496 %t8 = lshr i64 %t7, 1 497 %t9 = mul nsw i64 %t8, %t4 ; signed 498 %a10 = add nsw i64 %t9, %a1 ; signed 499 ret i64 %a10 500} 501 502define i64 @scalar_i64_signed_reg_mem(i64 %a1, i64* %a2_addr) nounwind { 503; X64-LABEL: scalar_i64_signed_reg_mem: 504; X64: # %bb.0: 505; X64-NEXT: movq (%rsi), %rax 506; X64-NEXT: xorl %ecx, %ecx 507; X64-NEXT: cmpq %rax, %rdi 508; X64-NEXT: setle %cl 509; X64-NEXT: leaq -1(%rcx,%rcx), %rcx 510; X64-NEXT: movq %rdi, %rdx 511; X64-NEXT: cmovgq %rax, %rdx 512; X64-NEXT: cmovgq %rdi, %rax 513; X64-NEXT: subq %rdx, %rax 514; X64-NEXT: shrq %rax 515; X64-NEXT: imulq %rcx, %rax 516; X64-NEXT: addq %rdi, %rax 517; X64-NEXT: retq 518; 519; X86-LABEL: scalar_i64_signed_reg_mem: 520; X86: # %bb.0: 521; X86-NEXT: pushl %ebp 522; X86-NEXT: pushl %ebx 523; X86-NEXT: pushl %edi 524; X86-NEXT: pushl %esi 525; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 526; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 527; X86-NEXT: movl (%edx), %eax 528; X86-NEXT: movl 4(%edx), %edi 529; X86-NEXT: cmpl %ecx, %eax 530; X86-NEXT: movl %edi, %edx 531; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 532; X86-NEXT: movl $-1, %ebx 533; X86-NEXT: jl .LBB8_1 534; X86-NEXT: # %bb.2: 535; X86-NEXT: xorl %ebp, %ebp 536; X86-NEXT: movl $1, %ebx 537; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 538; X86-NEXT: movl %ecx, %esi 539; X86-NEXT: jmp .LBB8_3 540; X86-NEXT: .LBB8_1: 541; X86-NEXT: movl $-1, %ebp 542; X86-NEXT: movl %edi, %edx 543; X86-NEXT: movl %eax, %esi 544; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 545; X86-NEXT: movl %ecx, %eax 546; X86-NEXT: .LBB8_3: 547; X86-NEXT: subl %esi, %eax 548; X86-NEXT: sbbl %edx, %edi 549; X86-NEXT: shrdl $1, %edi, %eax 550; X86-NEXT: imull %eax, %ebp 551; X86-NEXT: mull %ebx 552; X86-NEXT: addl %ebp, %edx 553; X86-NEXT: shrl %edi 554; X86-NEXT: imull %ebx, %edi 555; X86-NEXT: addl %edi, %edx 556; X86-NEXT: addl %ecx, %eax 557; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 558; X86-NEXT: popl %esi 559; X86-NEXT: popl %edi 560; X86-NEXT: popl %ebx 561; X86-NEXT: popl %ebp 562; X86-NEXT: retl 563 %a2 = load i64, i64* %a2_addr 564 %t3 = icmp sgt i64 %a1, %a2 ; signed 565 %t4 = select i1 %t3, i64 -1, i64 1 566 %t5 = select i1 %t3, i64 %a2, i64 %a1 567 %t6 = select i1 %t3, i64 %a1, i64 %a2 568 %t7 = sub i64 %t6, %t5 569 %t8 = lshr i64 %t7, 1 570 %t9 = mul nsw i64 %t8, %t4 ; signed 571 %a10 = add nsw i64 %t9, %a1 ; signed 572 ret i64 %a10 573} 574 575define i64 @scalar_i64_signed_mem_mem(i64* %a1_addr, i64* %a2_addr) nounwind { 576; X64-LABEL: scalar_i64_signed_mem_mem: 577; X64: # %bb.0: 578; X64-NEXT: movq (%rdi), %rcx 579; X64-NEXT: movq (%rsi), %rax 580; X64-NEXT: xorl %edx, %edx 581; X64-NEXT: cmpq %rax, %rcx 582; X64-NEXT: setle %dl 583; X64-NEXT: leaq -1(%rdx,%rdx), %rdx 584; X64-NEXT: movq %rcx, %rsi 585; X64-NEXT: cmovgq %rax, %rsi 586; X64-NEXT: cmovgq %rcx, %rax 587; X64-NEXT: subq %rsi, %rax 588; X64-NEXT: shrq %rax 589; X64-NEXT: imulq %rdx, %rax 590; X64-NEXT: addq %rcx, %rax 591; X64-NEXT: retq 592; 593; X86-LABEL: scalar_i64_signed_mem_mem: 594; X86: # %bb.0: 595; X86-NEXT: pushl %ebp 596; X86-NEXT: pushl %ebx 597; X86-NEXT: pushl %edi 598; X86-NEXT: pushl %esi 599; X86-NEXT: pushl %eax 600; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 601; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 602; X86-NEXT: movl (%eax), %esi 603; X86-NEXT: movl 4(%eax), %ecx 604; X86-NEXT: movl (%edx), %eax 605; X86-NEXT: movl 4(%edx), %edi 606; X86-NEXT: cmpl %esi, %eax 607; X86-NEXT: movl %edi, %edx 608; X86-NEXT: sbbl %ecx, %edx 609; X86-NEXT: movl $-1, %ebx 610; X86-NEXT: jl .LBB9_1 611; X86-NEXT: # %bb.2: 612; X86-NEXT: xorl %ebp, %ebp 613; X86-NEXT: movl $1, %ebx 614; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 615; X86-NEXT: movl %esi, %edx 616; X86-NEXT: jmp .LBB9_3 617; X86-NEXT: .LBB9_1: 618; X86-NEXT: movl $-1, %ebp 619; X86-NEXT: movl %edi, (%esp) # 4-byte Spill 620; X86-NEXT: movl %eax, %edx 621; X86-NEXT: movl %ecx, %edi 622; X86-NEXT: movl %esi, %eax 623; X86-NEXT: .LBB9_3: 624; X86-NEXT: subl %edx, %eax 625; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload 626; X86-NEXT: shrdl $1, %edi, %eax 627; X86-NEXT: imull %eax, %ebp 628; X86-NEXT: mull %ebx 629; X86-NEXT: addl %ebp, %edx 630; X86-NEXT: shrl %edi 631; X86-NEXT: imull %ebx, %edi 632; X86-NEXT: addl %edi, %edx 633; X86-NEXT: addl %esi, %eax 634; X86-NEXT: adcl %ecx, %edx 635; X86-NEXT: addl $4, %esp 636; X86-NEXT: popl %esi 637; X86-NEXT: popl %edi 638; X86-NEXT: popl %ebx 639; X86-NEXT: popl %ebp 640; X86-NEXT: retl 641 %a1 = load i64, i64* %a1_addr 642 %a2 = load i64, i64* %a2_addr 643 %t3 = icmp sgt i64 %a1, %a2 ; signed 644 %t4 = select i1 %t3, i64 -1, i64 1 645 %t5 = select i1 %t3, i64 %a2, i64 %a1 646 %t6 = select i1 %t3, i64 %a1, i64 %a2 647 %t7 = sub i64 %t6, %t5 648 %t8 = lshr i64 %t7, 1 649 %t9 = mul nsw i64 %t8, %t4 ; signed 650 %a10 = add nsw i64 %t9, %a1 ; signed 651 ret i64 %a10 652} 653 654; ---------------------------------------------------------------------------- ; 655; 16-bit width 656; ---------------------------------------------------------------------------- ; 657 658; Values come from regs 659 660define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { 661; X64-LABEL: scalar_i16_signed_reg_reg: 662; X64: # %bb.0: 663; X64-NEXT: xorl %eax, %eax 664; X64-NEXT: cmpw %si, %di 665; X64-NEXT: setle %al 666; X64-NEXT: leal -1(%rax,%rax), %ecx 667; X64-NEXT: movl %edi, %eax 668; X64-NEXT: cmovgl %esi, %eax 669; X64-NEXT: cmovgl %edi, %esi 670; X64-NEXT: subl %eax, %esi 671; X64-NEXT: movzwl %si, %eax 672; X64-NEXT: shrl %eax 673; X64-NEXT: imull %ecx, %eax 674; X64-NEXT: addl %edi, %eax 675; X64-NEXT: # kill: def $ax killed $ax killed $eax 676; X64-NEXT: retq 677; 678; X86-LABEL: scalar_i16_signed_reg_reg: 679; X86: # %bb.0: 680; X86-NEXT: pushl %esi 681; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 682; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 683; X86-NEXT: xorl %edx, %edx 684; X86-NEXT: cmpw %ax, %cx 685; X86-NEXT: setle %dl 686; X86-NEXT: leal -1(%edx,%edx), %edx 687; X86-NEXT: jg .LBB10_1 688; X86-NEXT: # %bb.2: 689; X86-NEXT: movl %ecx, %esi 690; X86-NEXT: jmp .LBB10_3 691; X86-NEXT: .LBB10_1: 692; X86-NEXT: movl %eax, %esi 693; X86-NEXT: movl %ecx, %eax 694; X86-NEXT: .LBB10_3: 695; X86-NEXT: subl %esi, %eax 696; X86-NEXT: movzwl %ax, %eax 697; X86-NEXT: shrl %eax 698; X86-NEXT: imull %edx, %eax 699; X86-NEXT: addl %ecx, %eax 700; X86-NEXT: # kill: def $ax killed $ax killed $eax 701; X86-NEXT: popl %esi 702; X86-NEXT: retl 703 %t3 = icmp sgt i16 %a1, %a2 ; signed 704 %t4 = select i1 %t3, i16 -1, i16 1 705 %t5 = select i1 %t3, i16 %a2, i16 %a1 706 %t6 = select i1 %t3, i16 %a1, i16 %a2 707 %t7 = sub i16 %t6, %t5 708 %t8 = lshr i16 %t7, 1 709 %t9 = mul nsw i16 %t8, %t4 ; signed 710 %a10 = add nsw i16 %t9, %a1 ; signed 711 ret i16 %a10 712} 713 714define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { 715; X64-LABEL: scalar_i16_unsigned_reg_reg: 716; X64: # %bb.0: 717; X64-NEXT: xorl %eax, %eax 718; X64-NEXT: cmpw %si, %di 719; X64-NEXT: setbe %al 720; X64-NEXT: leal -1(%rax,%rax), %ecx 721; X64-NEXT: movl %edi, %eax 722; X64-NEXT: cmoval %esi, %eax 723; X64-NEXT: cmoval %edi, %esi 724; X64-NEXT: subl %eax, %esi 725; X64-NEXT: movzwl %si, %eax 726; X64-NEXT: shrl %eax 727; X64-NEXT: imull %ecx, %eax 728; X64-NEXT: addl %edi, %eax 729; X64-NEXT: # kill: def $ax killed $ax killed $eax 730; X64-NEXT: retq 731; 732; X86-LABEL: scalar_i16_unsigned_reg_reg: 733; X86: # %bb.0: 734; X86-NEXT: pushl %esi 735; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 737; X86-NEXT: xorl %edx, %edx 738; X86-NEXT: cmpw %ax, %cx 739; X86-NEXT: setbe %dl 740; X86-NEXT: leal -1(%edx,%edx), %edx 741; X86-NEXT: ja .LBB11_1 742; X86-NEXT: # %bb.2: 743; X86-NEXT: movl %ecx, %esi 744; X86-NEXT: jmp .LBB11_3 745; X86-NEXT: .LBB11_1: 746; X86-NEXT: movl %eax, %esi 747; X86-NEXT: movl %ecx, %eax 748; X86-NEXT: .LBB11_3: 749; X86-NEXT: subl %esi, %eax 750; X86-NEXT: movzwl %ax, %eax 751; X86-NEXT: shrl %eax 752; X86-NEXT: imull %edx, %eax 753; X86-NEXT: addl %ecx, %eax 754; X86-NEXT: # kill: def $ax killed $ax killed $eax 755; X86-NEXT: popl %esi 756; X86-NEXT: retl 757 %t3 = icmp ugt i16 %a1, %a2 758 %t4 = select i1 %t3, i16 -1, i16 1 759 %t5 = select i1 %t3, i16 %a2, i16 %a1 760 %t6 = select i1 %t3, i16 %a1, i16 %a2 761 %t7 = sub i16 %t6, %t5 762 %t8 = lshr i16 %t7, 1 763 %t9 = mul i16 %t8, %t4 764 %a10 = add i16 %t9, %a1 765 ret i16 %a10 766} 767 768; Values are loaded. Only check signed case. 769 770define i16 @scalar_i16_signed_mem_reg(i16* %a1_addr, i16 %a2) nounwind { 771; X64-LABEL: scalar_i16_signed_mem_reg: 772; X64: # %bb.0: 773; X64-NEXT: movzwl (%rdi), %ecx 774; X64-NEXT: xorl %eax, %eax 775; X64-NEXT: cmpw %si, %cx 776; X64-NEXT: setle %al 777; X64-NEXT: leal -1(%rax,%rax), %edx 778; X64-NEXT: movl %ecx, %eax 779; X64-NEXT: cmovgl %esi, %eax 780; X64-NEXT: cmovgl %ecx, %esi 781; X64-NEXT: subl %eax, %esi 782; X64-NEXT: movzwl %si, %eax 783; X64-NEXT: shrl %eax 784; X64-NEXT: imull %edx, %eax 785; X64-NEXT: addl %ecx, %eax 786; X64-NEXT: # kill: def $ax killed $ax killed $eax 787; X64-NEXT: retq 788; 789; X86-LABEL: scalar_i16_signed_mem_reg: 790; X86: # %bb.0: 791; X86-NEXT: pushl %esi 792; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 793; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 794; X86-NEXT: movzwl (%ecx), %ecx 795; X86-NEXT: xorl %edx, %edx 796; X86-NEXT: cmpw %ax, %cx 797; X86-NEXT: setle %dl 798; X86-NEXT: leal -1(%edx,%edx), %edx 799; X86-NEXT: jg .LBB12_1 800; X86-NEXT: # %bb.2: 801; X86-NEXT: movl %ecx, %esi 802; X86-NEXT: jmp .LBB12_3 803; X86-NEXT: .LBB12_1: 804; X86-NEXT: movl %eax, %esi 805; X86-NEXT: movl %ecx, %eax 806; X86-NEXT: .LBB12_3: 807; X86-NEXT: subl %esi, %eax 808; X86-NEXT: movzwl %ax, %eax 809; X86-NEXT: shrl %eax 810; X86-NEXT: imull %edx, %eax 811; X86-NEXT: addl %ecx, %eax 812; X86-NEXT: # kill: def $ax killed $ax killed $eax 813; X86-NEXT: popl %esi 814; X86-NEXT: retl 815 %a1 = load i16, i16* %a1_addr 816 %t3 = icmp sgt i16 %a1, %a2 ; signed 817 %t4 = select i1 %t3, i16 -1, i16 1 818 %t5 = select i1 %t3, i16 %a2, i16 %a1 819 %t6 = select i1 %t3, i16 %a1, i16 %a2 820 %t7 = sub i16 %t6, %t5 821 %t8 = lshr i16 %t7, 1 822 %t9 = mul nsw i16 %t8, %t4 ; signed 823 %a10 = add nsw i16 %t9, %a1 ; signed 824 ret i16 %a10 825} 826 827define i16 @scalar_i16_signed_reg_mem(i16 %a1, i16* %a2_addr) nounwind { 828; X64-LABEL: scalar_i16_signed_reg_mem: 829; X64: # %bb.0: 830; X64-NEXT: movzwl (%rsi), %eax 831; X64-NEXT: xorl %ecx, %ecx 832; X64-NEXT: cmpw %ax, %di 833; X64-NEXT: setle %cl 834; X64-NEXT: leal -1(%rcx,%rcx), %ecx 835; X64-NEXT: movl %edi, %edx 836; X64-NEXT: cmovgl %eax, %edx 837; X64-NEXT: cmovgl %edi, %eax 838; X64-NEXT: subl %edx, %eax 839; X64-NEXT: movzwl %ax, %eax 840; X64-NEXT: shrl %eax 841; X64-NEXT: imull %ecx, %eax 842; X64-NEXT: addl %edi, %eax 843; X64-NEXT: # kill: def $ax killed $ax killed $eax 844; X64-NEXT: retq 845; 846; X86-LABEL: scalar_i16_signed_reg_mem: 847; X86: # %bb.0: 848; X86-NEXT: pushl %esi 849; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 850; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 851; X86-NEXT: movzwl (%eax), %eax 852; X86-NEXT: xorl %edx, %edx 853; X86-NEXT: cmpw %ax, %cx 854; X86-NEXT: setle %dl 855; X86-NEXT: leal -1(%edx,%edx), %edx 856; X86-NEXT: jg .LBB13_1 857; X86-NEXT: # %bb.2: 858; X86-NEXT: movl %ecx, %esi 859; X86-NEXT: jmp .LBB13_3 860; X86-NEXT: .LBB13_1: 861; X86-NEXT: movl %eax, %esi 862; X86-NEXT: movl %ecx, %eax 863; X86-NEXT: .LBB13_3: 864; X86-NEXT: subl %esi, %eax 865; X86-NEXT: movzwl %ax, %eax 866; X86-NEXT: shrl %eax 867; X86-NEXT: imull %edx, %eax 868; X86-NEXT: addl %ecx, %eax 869; X86-NEXT: # kill: def $ax killed $ax killed $eax 870; X86-NEXT: popl %esi 871; X86-NEXT: retl 872 %a2 = load i16, i16* %a2_addr 873 %t3 = icmp sgt i16 %a1, %a2 ; signed 874 %t4 = select i1 %t3, i16 -1, i16 1 875 %t5 = select i1 %t3, i16 %a2, i16 %a1 876 %t6 = select i1 %t3, i16 %a1, i16 %a2 877 %t7 = sub i16 %t6, %t5 878 %t8 = lshr i16 %t7, 1 879 %t9 = mul nsw i16 %t8, %t4 ; signed 880 %a10 = add nsw i16 %t9, %a1 ; signed 881 ret i16 %a10 882} 883 884define i16 @scalar_i16_signed_mem_mem(i16* %a1_addr, i16* %a2_addr) nounwind { 885; X64-LABEL: scalar_i16_signed_mem_mem: 886; X64: # %bb.0: 887; X64-NEXT: movzwl (%rdi), %ecx 888; X64-NEXT: movzwl (%rsi), %eax 889; X64-NEXT: xorl %edx, %edx 890; X64-NEXT: cmpw %ax, %cx 891; X64-NEXT: setle %dl 892; X64-NEXT: leal -1(%rdx,%rdx), %edx 893; X64-NEXT: movl %ecx, %esi 894; X64-NEXT: cmovgl %eax, %esi 895; X64-NEXT: cmovgl %ecx, %eax 896; X64-NEXT: subl %esi, %eax 897; X64-NEXT: movzwl %ax, %eax 898; X64-NEXT: shrl %eax 899; X64-NEXT: imull %edx, %eax 900; X64-NEXT: addl %ecx, %eax 901; X64-NEXT: # kill: def $ax killed $ax killed $eax 902; X64-NEXT: retq 903; 904; X86-LABEL: scalar_i16_signed_mem_mem: 905; X86: # %bb.0: 906; X86-NEXT: pushl %esi 907; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 908; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 909; X86-NEXT: movzwl (%ecx), %ecx 910; X86-NEXT: movzwl (%eax), %eax 911; X86-NEXT: xorl %edx, %edx 912; X86-NEXT: cmpw %ax, %cx 913; X86-NEXT: setle %dl 914; X86-NEXT: leal -1(%edx,%edx), %edx 915; X86-NEXT: jg .LBB14_1 916; X86-NEXT: # %bb.2: 917; X86-NEXT: movl %ecx, %esi 918; X86-NEXT: jmp .LBB14_3 919; X86-NEXT: .LBB14_1: 920; X86-NEXT: movl %eax, %esi 921; X86-NEXT: movl %ecx, %eax 922; X86-NEXT: .LBB14_3: 923; X86-NEXT: subl %esi, %eax 924; X86-NEXT: movzwl %ax, %eax 925; X86-NEXT: shrl %eax 926; X86-NEXT: imull %edx, %eax 927; X86-NEXT: addl %ecx, %eax 928; X86-NEXT: # kill: def $ax killed $ax killed $eax 929; X86-NEXT: popl %esi 930; X86-NEXT: retl 931 %a1 = load i16, i16* %a1_addr 932 %a2 = load i16, i16* %a2_addr 933 %t3 = icmp sgt i16 %a1, %a2 ; signed 934 %t4 = select i1 %t3, i16 -1, i16 1 935 %t5 = select i1 %t3, i16 %a2, i16 %a1 936 %t6 = select i1 %t3, i16 %a1, i16 %a2 937 %t7 = sub i16 %t6, %t5 938 %t8 = lshr i16 %t7, 1 939 %t9 = mul nsw i16 %t8, %t4 ; signed 940 %a10 = add nsw i16 %t9, %a1 ; signed 941 ret i16 %a10 942} 943 944; ---------------------------------------------------------------------------- ; 945; 8-bit width 946; ---------------------------------------------------------------------------- ; 947 948; Values come from regs 949 950define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { 951; X64-LABEL: scalar_i8_signed_reg_reg: 952; X64: # %bb.0: 953; X64-NEXT: movl %esi, %eax 954; X64-NEXT: cmpb %al, %dil 955; X64-NEXT: setle %cl 956; X64-NEXT: movl %edi, %edx 957; X64-NEXT: cmovgl %esi, %edx 958; X64-NEXT: cmovgl %edi, %eax 959; X64-NEXT: addb %cl, %cl 960; X64-NEXT: decb %cl 961; X64-NEXT: subb %dl, %al 962; X64-NEXT: shrb %al 963; X64-NEXT: # kill: def $al killed $al killed $eax 964; X64-NEXT: mulb %cl 965; X64-NEXT: addb %dil, %al 966; X64-NEXT: retq 967; 968; X86-LABEL: scalar_i8_signed_reg_reg: 969; X86: # %bb.0: 970; X86-NEXT: movb {{[0-9]+}}(%esp), %al 971; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 972; X86-NEXT: cmpb %al, %cl 973; X86-NEXT: setle %dl 974; X86-NEXT: jg .LBB15_1 975; X86-NEXT: # %bb.2: 976; X86-NEXT: movb %cl, %ah 977; X86-NEXT: jmp .LBB15_3 978; X86-NEXT: .LBB15_1: 979; X86-NEXT: movb %al, %ah 980; X86-NEXT: movb %cl, %al 981; X86-NEXT: .LBB15_3: 982; X86-NEXT: subb %ah, %al 983; X86-NEXT: addb %dl, %dl 984; X86-NEXT: decb %dl 985; X86-NEXT: shrb %al 986; X86-NEXT: mulb %dl 987; X86-NEXT: addb %cl, %al 988; X86-NEXT: retl 989 %t3 = icmp sgt i8 %a1, %a2 ; signed 990 %t4 = select i1 %t3, i8 -1, i8 1 991 %t5 = select i1 %t3, i8 %a2, i8 %a1 992 %t6 = select i1 %t3, i8 %a1, i8 %a2 993 %t7 = sub i8 %t6, %t5 994 %t8 = lshr i8 %t7, 1 995 %t9 = mul nsw i8 %t8, %t4 ; signed 996 %a10 = add nsw i8 %t9, %a1 ; signed 997 ret i8 %a10 998} 999 1000define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { 1001; X64-LABEL: scalar_i8_unsigned_reg_reg: 1002; X64: # %bb.0: 1003; X64-NEXT: movl %esi, %eax 1004; X64-NEXT: cmpb %al, %dil 1005; X64-NEXT: setbe %cl 1006; X64-NEXT: movl %edi, %edx 1007; X64-NEXT: cmoval %esi, %edx 1008; X64-NEXT: cmoval %edi, %eax 1009; X64-NEXT: addb %cl, %cl 1010; X64-NEXT: decb %cl 1011; X64-NEXT: subb %dl, %al 1012; X64-NEXT: shrb %al 1013; X64-NEXT: # kill: def $al killed $al killed $eax 1014; X64-NEXT: mulb %cl 1015; X64-NEXT: addb %dil, %al 1016; X64-NEXT: retq 1017; 1018; X86-LABEL: scalar_i8_unsigned_reg_reg: 1019; X86: # %bb.0: 1020; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1021; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 1022; X86-NEXT: cmpb %al, %cl 1023; X86-NEXT: setbe %dl 1024; X86-NEXT: ja .LBB16_1 1025; X86-NEXT: # %bb.2: 1026; X86-NEXT: movb %cl, %ah 1027; X86-NEXT: jmp .LBB16_3 1028; X86-NEXT: .LBB16_1: 1029; X86-NEXT: movb %al, %ah 1030; X86-NEXT: movb %cl, %al 1031; X86-NEXT: .LBB16_3: 1032; X86-NEXT: subb %ah, %al 1033; X86-NEXT: addb %dl, %dl 1034; X86-NEXT: decb %dl 1035; X86-NEXT: shrb %al 1036; X86-NEXT: mulb %dl 1037; X86-NEXT: addb %cl, %al 1038; X86-NEXT: retl 1039 %t3 = icmp ugt i8 %a1, %a2 1040 %t4 = select i1 %t3, i8 -1, i8 1 1041 %t5 = select i1 %t3, i8 %a2, i8 %a1 1042 %t6 = select i1 %t3, i8 %a1, i8 %a2 1043 %t7 = sub i8 %t6, %t5 1044 %t8 = lshr i8 %t7, 1 1045 %t9 = mul i8 %t8, %t4 1046 %a10 = add i8 %t9, %a1 1047 ret i8 %a10 1048} 1049 1050; Values are loaded. Only check signed case. 1051 1052define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind { 1053; X64-LABEL: scalar_i8_signed_mem_reg: 1054; X64: # %bb.0: 1055; X64-NEXT: movzbl (%rdi), %ecx 1056; X64-NEXT: cmpb %sil, %cl 1057; X64-NEXT: setle %dl 1058; X64-NEXT: movl %ecx, %edi 1059; X64-NEXT: cmovgl %esi, %edi 1060; X64-NEXT: movl %ecx, %eax 1061; X64-NEXT: cmovlel %esi, %eax 1062; X64-NEXT: addb %dl, %dl 1063; X64-NEXT: decb %dl 1064; X64-NEXT: subb %dil, %al 1065; X64-NEXT: shrb %al 1066; X64-NEXT: # kill: def $al killed $al killed $eax 1067; X64-NEXT: mulb %dl 1068; X64-NEXT: addb %cl, %al 1069; X64-NEXT: retq 1070; 1071; X86-LABEL: scalar_i8_signed_mem_reg: 1072; X86: # %bb.0: 1073; X86-NEXT: movb {{[0-9]+}}(%esp), %al 1074; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1075; X86-NEXT: movb (%ecx), %cl 1076; X86-NEXT: cmpb %al, %cl 1077; X86-NEXT: setle %dl 1078; X86-NEXT: jg .LBB17_1 1079; X86-NEXT: # %bb.2: 1080; X86-NEXT: movb %cl, %ah 1081; X86-NEXT: jmp .LBB17_3 1082; X86-NEXT: .LBB17_1: 1083; X86-NEXT: movb %al, %ah 1084; X86-NEXT: movb %cl, %al 1085; X86-NEXT: .LBB17_3: 1086; X86-NEXT: subb %ah, %al 1087; X86-NEXT: addb %dl, %dl 1088; X86-NEXT: decb %dl 1089; X86-NEXT: shrb %al 1090; X86-NEXT: mulb %dl 1091; X86-NEXT: addb %cl, %al 1092; X86-NEXT: retl 1093 %a1 = load i8, i8* %a1_addr 1094 %t3 = icmp sgt i8 %a1, %a2 ; signed 1095 %t4 = select i1 %t3, i8 -1, i8 1 1096 %t5 = select i1 %t3, i8 %a2, i8 %a1 1097 %t6 = select i1 %t3, i8 %a1, i8 %a2 1098 %t7 = sub i8 %t6, %t5 1099 %t8 = lshr i8 %t7, 1 1100 %t9 = mul nsw i8 %t8, %t4 ; signed 1101 %a10 = add nsw i8 %t9, %a1 ; signed 1102 ret i8 %a10 1103} 1104 1105define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind { 1106; X64-LABEL: scalar_i8_signed_reg_mem: 1107; X64: # %bb.0: 1108; X64-NEXT: movzbl (%rsi), %eax 1109; X64-NEXT: cmpb %al, %dil 1110; X64-NEXT: setle %cl 1111; X64-NEXT: movl %edi, %edx 1112; X64-NEXT: cmovgl %eax, %edx 1113; X64-NEXT: cmovgl %edi, %eax 1114; X64-NEXT: addb %cl, %cl 1115; X64-NEXT: decb %cl 1116; X64-NEXT: subb %dl, %al 1117; X64-NEXT: shrb %al 1118; X64-NEXT: # kill: def $al killed $al killed $eax 1119; X64-NEXT: mulb %cl 1120; X64-NEXT: addb %dil, %al 1121; X64-NEXT: retq 1122; 1123; X86-LABEL: scalar_i8_signed_reg_mem: 1124; X86: # %bb.0: 1125; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 1126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1127; X86-NEXT: movb (%eax), %al 1128; X86-NEXT: cmpb %al, %cl 1129; X86-NEXT: setle %dl 1130; X86-NEXT: jg .LBB18_1 1131; X86-NEXT: # %bb.2: 1132; X86-NEXT: movb %cl, %ah 1133; X86-NEXT: jmp .LBB18_3 1134; X86-NEXT: .LBB18_1: 1135; X86-NEXT: movb %al, %ah 1136; X86-NEXT: movb %cl, %al 1137; X86-NEXT: .LBB18_3: 1138; X86-NEXT: subb %ah, %al 1139; X86-NEXT: addb %dl, %dl 1140; X86-NEXT: decb %dl 1141; X86-NEXT: shrb %al 1142; X86-NEXT: mulb %dl 1143; X86-NEXT: addb %cl, %al 1144; X86-NEXT: retl 1145 %a2 = load i8, i8* %a2_addr 1146 %t3 = icmp sgt i8 %a1, %a2 ; signed 1147 %t4 = select i1 %t3, i8 -1, i8 1 1148 %t5 = select i1 %t3, i8 %a2, i8 %a1 1149 %t6 = select i1 %t3, i8 %a1, i8 %a2 1150 %t7 = sub i8 %t6, %t5 1151 %t8 = lshr i8 %t7, 1 1152 %t9 = mul nsw i8 %t8, %t4 ; signed 1153 %a10 = add nsw i8 %t9, %a1 ; signed 1154 ret i8 %a10 1155} 1156 1157define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind { 1158; X64-LABEL: scalar_i8_signed_mem_mem: 1159; X64: # %bb.0: 1160; X64-NEXT: movzbl (%rdi), %ecx 1161; X64-NEXT: movzbl (%rsi), %eax 1162; X64-NEXT: cmpb %al, %cl 1163; X64-NEXT: setle %dl 1164; X64-NEXT: movl %ecx, %esi 1165; X64-NEXT: cmovgl %eax, %esi 1166; X64-NEXT: cmovgl %ecx, %eax 1167; X64-NEXT: addb %dl, %dl 1168; X64-NEXT: decb %dl 1169; X64-NEXT: subb %sil, %al 1170; X64-NEXT: shrb %al 1171; X64-NEXT: # kill: def $al killed $al killed $eax 1172; X64-NEXT: mulb %dl 1173; X64-NEXT: addb %cl, %al 1174; X64-NEXT: retq 1175; 1176; X86-LABEL: scalar_i8_signed_mem_mem: 1177; X86: # %bb.0: 1178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1179; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1180; X86-NEXT: movb (%ecx), %cl 1181; X86-NEXT: movb (%eax), %al 1182; X86-NEXT: cmpb %al, %cl 1183; X86-NEXT: setle %dl 1184; X86-NEXT: jg .LBB19_1 1185; X86-NEXT: # %bb.2: 1186; X86-NEXT: movb %cl, %ah 1187; X86-NEXT: jmp .LBB19_3 1188; X86-NEXT: .LBB19_1: 1189; X86-NEXT: movb %al, %ah 1190; X86-NEXT: movb %cl, %al 1191; X86-NEXT: .LBB19_3: 1192; X86-NEXT: subb %ah, %al 1193; X86-NEXT: addb %dl, %dl 1194; X86-NEXT: decb %dl 1195; X86-NEXT: shrb %al 1196; X86-NEXT: mulb %dl 1197; X86-NEXT: addb %cl, %al 1198; X86-NEXT: retl 1199 %a1 = load i8, i8* %a1_addr 1200 %a2 = load i8, i8* %a2_addr 1201 %t3 = icmp sgt i8 %a1, %a2 ; signed 1202 %t4 = select i1 %t3, i8 -1, i8 1 1203 %t5 = select i1 %t3, i8 %a2, i8 %a1 1204 %t6 = select i1 %t3, i8 %a1, i8 %a2 1205 %t7 = sub i8 %t6, %t5 1206 %t8 = lshr i8 %t7, 1 1207 %t9 = mul nsw i8 %t8, %t4 ; signed 1208 %a10 = add nsw i8 %t9, %a1 ; signed 1209 ret i8 %a10 1210} 1211