1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW 6 7declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone 8declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone 9declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone 10declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone 11 12; 13; Variable Funnel Shift 14; 15 16define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { 17; X86-LABEL: var_shift_i8: 18; X86: # %bb.0: 19; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 20; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 21; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 22; X86-NEXT: shll $8, %eax 23; X86-NEXT: orl %edx, %eax 24; X86-NEXT: andb $7, %cl 25; X86-NEXT: shll %cl, %eax 26; X86-NEXT: movb %ah, %al 27; X86-NEXT: retl 28; 29; X64-LABEL: var_shift_i8: 30; X64: # %bb.0: 31; X64-NEXT: movl %edx, %ecx 32; X64-NEXT: shll $8, %edi 33; X64-NEXT: movzbl %sil, %eax 34; X64-NEXT: orl %edi, %eax 35; X64-NEXT: andb $7, %cl 36; X64-NEXT: # kill: def $cl killed $cl killed $ecx 37; X64-NEXT: shll %cl, %eax 38; X64-NEXT: shrl $8, %eax 39; X64-NEXT: # kill: def $al killed $al killed $eax 40; X64-NEXT: retq 41 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z) 42 ret i8 %tmp 43} 44 45define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { 46; X86-FAST-LABEL: var_shift_i16: 47; X86-FAST: # %bb.0: 48; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx 49; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax 50; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl 51; X86-FAST-NEXT: andb $15, %cl 52; X86-FAST-NEXT: shldw %cl, %dx, %ax 53; X86-FAST-NEXT: retl 54; 55; X86-SLOW-LABEL: var_shift_i16: 56; X86-SLOW: # %bb.0: 57; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl 58; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx 59; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 60; X86-SLOW-NEXT: shll $16, %eax 61; X86-SLOW-NEXT: orl %edx, %eax 62; X86-SLOW-NEXT: andb $15, %cl 63; X86-SLOW-NEXT: shll %cl, %eax 64; X86-SLOW-NEXT: shrl $16, %eax 65; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 66; X86-SLOW-NEXT: retl 67; 68; X64-FAST-LABEL: var_shift_i16: 69; X64-FAST: # %bb.0: 70; X64-FAST-NEXT: movl %edx, %ecx 71; X64-FAST-NEXT: movl %edi, %eax 72; X64-FAST-NEXT: andb $15, %cl 73; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 74; X64-FAST-NEXT: shldw %cl, %si, %ax 75; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax 76; X64-FAST-NEXT: retq 77; 78; X64-SLOW-LABEL: var_shift_i16: 79; X64-SLOW: # %bb.0: 80; X64-SLOW-NEXT: movl %edx, %ecx 81; X64-SLOW-NEXT: shll $16, %edi 82; X64-SLOW-NEXT: movzwl %si, %eax 83; X64-SLOW-NEXT: orl %edi, %eax 84; X64-SLOW-NEXT: andb $15, %cl 85; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx 86; X64-SLOW-NEXT: shll %cl, %eax 87; X64-SLOW-NEXT: shrl $16, %eax 88; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 89; X64-SLOW-NEXT: retq 90 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) 91 ret i16 %tmp 92} 93 94define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { 95; X86-FAST-LABEL: var_shift_i32: 96; X86-FAST: # %bb.0: 97; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl 98; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 99; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 100; X86-FAST-NEXT: shldl %cl, %edx, %eax 101; X86-FAST-NEXT: retl 102; 103; X86-SLOW-LABEL: var_shift_i32: 104; X86-SLOW: # %bb.0: 105; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 106; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl 107; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 108; X86-SLOW-NEXT: shll %cl, %edx 109; X86-SLOW-NEXT: notb %cl 110; X86-SLOW-NEXT: shrl %eax 111; X86-SLOW-NEXT: shrl %cl, %eax 112; X86-SLOW-NEXT: orl %edx, %eax 113; X86-SLOW-NEXT: retl 114; 115; X64-FAST-LABEL: var_shift_i32: 116; X64-FAST: # %bb.0: 117; X64-FAST-NEXT: movl %edx, %ecx 118; X64-FAST-NEXT: movl %edi, %eax 119; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 120; X64-FAST-NEXT: shldl %cl, %esi, %eax 121; X64-FAST-NEXT: retq 122; 123; X64-SLOW-LABEL: var_shift_i32: 124; X64-SLOW: # %bb.0: 125; X64-SLOW-NEXT: movl %edx, %ecx 126; X64-SLOW-NEXT: movl %esi, %eax 127; X64-SLOW-NEXT: shll %cl, %edi 128; X64-SLOW-NEXT: shrl %eax 129; X64-SLOW-NEXT: notb %cl 130; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx 131; X64-SLOW-NEXT: shrl %cl, %eax 132; X64-SLOW-NEXT: orl %edi, %eax 133; X64-SLOW-NEXT: retq 134 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 135 ret i32 %tmp 136} 137 138define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { 139; X86-LABEL: var_shift_i32_optsize: 140; X86: # %bb.0: 141; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 142; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 143; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 144; X86-NEXT: shldl %cl, %edx, %eax 145; X86-NEXT: retl 146; 147; X64-LABEL: var_shift_i32_optsize: 148; X64: # %bb.0: 149; X64-NEXT: movl %edx, %ecx 150; X64-NEXT: movl %edi, %eax 151; X64-NEXT: # kill: def $cl killed $cl killed $ecx 152; X64-NEXT: shldl %cl, %esi, %eax 153; X64-NEXT: retq 154 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 155 ret i32 %tmp 156} 157 158define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 { 159; X86-LABEL: var_shift_i32_pgso: 160; X86: # %bb.0: 161; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 162; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 163; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 164; X86-NEXT: shldl %cl, %edx, %eax 165; X86-NEXT: retl 166; 167; X64-LABEL: var_shift_i32_pgso: 168; X64: # %bb.0: 169; X64-NEXT: movl %edx, %ecx 170; X64-NEXT: movl %edi, %eax 171; X64-NEXT: # kill: def $cl killed $cl killed $ecx 172; X64-NEXT: shldl %cl, %esi, %eax 173; X64-NEXT: retq 174 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 175 ret i32 %tmp 176} 177 178define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { 179; X86-FAST-LABEL: var_shift_i64: 180; X86-FAST: # %bb.0: 181; X86-FAST-NEXT: pushl %ebx 182; X86-FAST-NEXT: pushl %edi 183; X86-FAST-NEXT: pushl %esi 184; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx 185; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 186; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi 187; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi 188; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch 189; X86-FAST-NEXT: movb %ch, %cl 190; X86-FAST-NEXT: notb %cl 191; X86-FAST-NEXT: shrdl $1, %edi, %esi 192; X86-FAST-NEXT: shrl %edi 193; X86-FAST-NEXT: shrdl %cl, %edi, %esi 194; X86-FAST-NEXT: shrl %cl, %edi 195; X86-FAST-NEXT: testb $32, %cl 196; X86-FAST-NEXT: je .LBB5_2 197; X86-FAST-NEXT: # %bb.1: 198; X86-FAST-NEXT: movl %edi, %esi 199; X86-FAST-NEXT: xorl %edi, %edi 200; X86-FAST-NEXT: .LBB5_2: 201; X86-FAST-NEXT: movl %ebx, %eax 202; X86-FAST-NEXT: movb %ch, %cl 203; X86-FAST-NEXT: shll %cl, %eax 204; X86-FAST-NEXT: shldl %cl, %ebx, %edx 205; X86-FAST-NEXT: testb $32, %ch 206; X86-FAST-NEXT: je .LBB5_4 207; X86-FAST-NEXT: # %bb.3: 208; X86-FAST-NEXT: movl %eax, %edx 209; X86-FAST-NEXT: xorl %eax, %eax 210; X86-FAST-NEXT: .LBB5_4: 211; X86-FAST-NEXT: orl %edi, %edx 212; X86-FAST-NEXT: orl %esi, %eax 213; X86-FAST-NEXT: popl %esi 214; X86-FAST-NEXT: popl %edi 215; X86-FAST-NEXT: popl %ebx 216; X86-FAST-NEXT: retl 217; 218; X86-SLOW-LABEL: var_shift_i64: 219; X86-SLOW: # %bb.0: 220; X86-SLOW-NEXT: pushl %ebp 221; X86-SLOW-NEXT: pushl %ebx 222; X86-SLOW-NEXT: pushl %edi 223; X86-SLOW-NEXT: pushl %esi 224; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 225; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx 226; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl 227; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 228; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi 229; X86-SLOW-NEXT: shrl %eax 230; X86-SLOW-NEXT: movl %esi, %edi 231; X86-SLOW-NEXT: shll $31, %edi 232; X86-SLOW-NEXT: orl %eax, %edi 233; X86-SLOW-NEXT: movl %ecx, %eax 234; X86-SLOW-NEXT: movb %cl, %ch 235; X86-SLOW-NEXT: notb %ch 236; X86-SLOW-NEXT: movb %ch, %cl 237; X86-SLOW-NEXT: shrl %cl, %edi 238; X86-SLOW-NEXT: shrl %esi 239; X86-SLOW-NEXT: leal (%esi,%esi), %ebp 240; X86-SLOW-NEXT: movb %al, %cl 241; X86-SLOW-NEXT: shll %cl, %ebp 242; X86-SLOW-NEXT: shll %cl, %ebx 243; X86-SLOW-NEXT: movl %edx, %eax 244; X86-SLOW-NEXT: shrl %eax 245; X86-SLOW-NEXT: movb %ch, %cl 246; X86-SLOW-NEXT: shrl %cl, %eax 247; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl 248; X86-SLOW-NEXT: shll %cl, %edx 249; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp) 250; X86-SLOW-NEXT: jne .LBB5_1 251; X86-SLOW-NEXT: # %bb.2: 252; X86-SLOW-NEXT: orl %eax, %ebx 253; X86-SLOW-NEXT: jmp .LBB5_3 254; X86-SLOW-NEXT: .LBB5_1: 255; X86-SLOW-NEXT: movl %edx, %ebx 256; X86-SLOW-NEXT: xorl %edx, %edx 257; X86-SLOW-NEXT: .LBB5_3: 258; X86-SLOW-NEXT: movb %ch, %cl 259; X86-SLOW-NEXT: shrl %cl, %esi 260; X86-SLOW-NEXT: testb $32, %ch 261; X86-SLOW-NEXT: jne .LBB5_4 262; X86-SLOW-NEXT: # %bb.5: 263; X86-SLOW-NEXT: orl %edi, %ebp 264; X86-SLOW-NEXT: jmp .LBB5_6 265; X86-SLOW-NEXT: .LBB5_4: 266; X86-SLOW-NEXT: movl %esi, %ebp 267; X86-SLOW-NEXT: xorl %esi, %esi 268; X86-SLOW-NEXT: .LBB5_6: 269; X86-SLOW-NEXT: orl %ebp, %edx 270; X86-SLOW-NEXT: orl %esi, %ebx 271; X86-SLOW-NEXT: movl %edx, %eax 272; X86-SLOW-NEXT: movl %ebx, %edx 273; X86-SLOW-NEXT: popl %esi 274; X86-SLOW-NEXT: popl %edi 275; X86-SLOW-NEXT: popl %ebx 276; X86-SLOW-NEXT: popl %ebp 277; X86-SLOW-NEXT: retl 278; 279; X64-FAST-LABEL: var_shift_i64: 280; X64-FAST: # %bb.0: 281; X64-FAST-NEXT: movq %rdx, %rcx 282; X64-FAST-NEXT: movq %rdi, %rax 283; X64-FAST-NEXT: # kill: def $cl killed $cl killed $rcx 284; X64-FAST-NEXT: shldq %cl, %rsi, %rax 285; X64-FAST-NEXT: retq 286; 287; X64-SLOW-LABEL: var_shift_i64: 288; X64-SLOW: # %bb.0: 289; X64-SLOW-NEXT: movq %rdx, %rcx 290; X64-SLOW-NEXT: movq %rsi, %rax 291; X64-SLOW-NEXT: shlq %cl, %rdi 292; X64-SLOW-NEXT: shrq %rax 293; X64-SLOW-NEXT: notb %cl 294; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $rcx 295; X64-SLOW-NEXT: shrq %cl, %rax 296; X64-SLOW-NEXT: orq %rdi, %rax 297; X64-SLOW-NEXT: retq 298 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 299 ret i64 %tmp 300} 301 302; 303; Const Funnel Shift 304; 305 306define i8 @const_shift_i8(i8 %x, i8 %y) nounwind { 307; X86-LABEL: const_shift_i8: 308; X86: # %bb.0: 309; X86-NEXT: movb {{[0-9]+}}(%esp), %al 310; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 311; X86-NEXT: shrb %cl 312; X86-NEXT: shlb $7, %al 313; X86-NEXT: orb %cl, %al 314; X86-NEXT: retl 315; 316; X64-LABEL: const_shift_i8: 317; X64: # %bb.0: 318; X64-NEXT: # kill: def $esi killed $esi def $rsi 319; X64-NEXT: # kill: def $edi killed $edi def $rdi 320; X64-NEXT: shrb %sil 321; X64-NEXT: shlb $7, %dil 322; X64-NEXT: leal (%rdi,%rsi), %eax 323; X64-NEXT: # kill: def $al killed $al killed $eax 324; X64-NEXT: retq 325 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7) 326 ret i8 %tmp 327} 328 329define i16 @const_shift_i16(i16 %x, i16 %y) nounwind { 330; X86-FAST-LABEL: const_shift_i16: 331; X86-FAST: # %bb.0: 332; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 333; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax 334; X86-FAST-NEXT: shldw $7, %cx, %ax 335; X86-FAST-NEXT: retl 336; 337; X86-SLOW-LABEL: const_shift_i16: 338; X86-SLOW: # %bb.0: 339; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 340; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 341; X86-SLOW-NEXT: shrl $9, %ecx 342; X86-SLOW-NEXT: shll $7, %eax 343; X86-SLOW-NEXT: orl %ecx, %eax 344; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 345; X86-SLOW-NEXT: retl 346; 347; X64-FAST-LABEL: const_shift_i16: 348; X64-FAST: # %bb.0: 349; X64-FAST-NEXT: movl %edi, %eax 350; X64-FAST-NEXT: shldw $7, %si, %ax 351; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax 352; X64-FAST-NEXT: retq 353; 354; X64-SLOW-LABEL: const_shift_i16: 355; X64-SLOW: # %bb.0: 356; X64-SLOW-NEXT: movzwl %si, %eax 357; X64-SLOW-NEXT: shll $7, %edi 358; X64-SLOW-NEXT: shrl $9, %eax 359; X64-SLOW-NEXT: orl %edi, %eax 360; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 361; X64-SLOW-NEXT: retq 362 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7) 363 ret i16 %tmp 364} 365 366define i32 @const_shift_i32(i32 %x, i32 %y) nounwind { 367; X86-FAST-LABEL: const_shift_i32: 368; X86-FAST: # %bb.0: 369; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 370; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 371; X86-FAST-NEXT: shldl $7, %ecx, %eax 372; X86-FAST-NEXT: retl 373; 374; X86-SLOW-LABEL: const_shift_i32: 375; X86-SLOW: # %bb.0: 376; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 377; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx 378; X86-SLOW-NEXT: shrl $25, %ecx 379; X86-SLOW-NEXT: shll $7, %eax 380; X86-SLOW-NEXT: orl %ecx, %eax 381; X86-SLOW-NEXT: retl 382; 383; X64-FAST-LABEL: const_shift_i32: 384; X64-FAST: # %bb.0: 385; X64-FAST-NEXT: movl %edi, %eax 386; X64-FAST-NEXT: shldl $7, %esi, %eax 387; X64-FAST-NEXT: retq 388; 389; X64-SLOW-LABEL: const_shift_i32: 390; X64-SLOW: # %bb.0: 391; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi 392; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi 393; X64-SLOW-NEXT: shrl $25, %esi 394; X64-SLOW-NEXT: shll $7, %edi 395; X64-SLOW-NEXT: leal (%rdi,%rsi), %eax 396; X64-SLOW-NEXT: retq 397 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) 398 ret i32 %tmp 399} 400 401define i64 @const_shift_i64(i64 %x, i64 %y) nounwind { 402; X86-FAST-LABEL: const_shift_i64: 403; X86-FAST: # %bb.0: 404; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 405; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 406; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 407; X86-FAST-NEXT: shrdl $25, %ecx, %eax 408; X86-FAST-NEXT: shldl $7, %ecx, %edx 409; X86-FAST-NEXT: retl 410; 411; X86-SLOW-LABEL: const_shift_i64: 412; X86-SLOW: # %bb.0: 413; X86-SLOW-NEXT: pushl %esi 414; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx 415; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 416; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi 417; X86-SLOW-NEXT: shrl $25, %esi 418; X86-SLOW-NEXT: movl %ecx, %eax 419; X86-SLOW-NEXT: shll $7, %eax 420; X86-SLOW-NEXT: orl %esi, %eax 421; X86-SLOW-NEXT: shrl $25, %ecx 422; X86-SLOW-NEXT: shll $7, %edx 423; X86-SLOW-NEXT: orl %ecx, %edx 424; X86-SLOW-NEXT: popl %esi 425; X86-SLOW-NEXT: retl 426; 427; X64-FAST-LABEL: const_shift_i64: 428; X64-FAST: # %bb.0: 429; X64-FAST-NEXT: movq %rdi, %rax 430; X64-FAST-NEXT: shldq $7, %rsi, %rax 431; X64-FAST-NEXT: retq 432; 433; X64-SLOW-LABEL: const_shift_i64: 434; X64-SLOW: # %bb.0: 435; X64-SLOW-NEXT: shrq $57, %rsi 436; X64-SLOW-NEXT: shlq $7, %rdi 437; X64-SLOW-NEXT: leaq (%rdi,%rsi), %rax 438; X64-SLOW-NEXT: retq 439 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7) 440 ret i64 %tmp 441} 442 443; 444; Combine Consecutive Loads 445; 446 447define i8 @combine_fshl_load_i8(i8* %p) nounwind { 448; X86-LABEL: combine_fshl_load_i8: 449; X86: # %bb.0: 450; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 451; X86-NEXT: movb 1(%eax), %al 452; X86-NEXT: retl 453; 454; X64-LABEL: combine_fshl_load_i8: 455; X64: # %bb.0: 456; X64-NEXT: movb 1(%rdi), %al 457; X64-NEXT: retq 458 %p1 = getelementptr i8, i8* %p, i32 1 459 %ld0 = load i8, i8 *%p 460 %ld1 = load i8, i8 *%p1 461 %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8) 462 ret i8 %res 463} 464 465define i16 @combine_fshl_load_i16(i16* %p) nounwind { 466; X86-LABEL: combine_fshl_load_i16: 467; X86: # %bb.0: 468; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 469; X86-NEXT: movzwl 1(%eax), %eax 470; X86-NEXT: retl 471; 472; X64-LABEL: combine_fshl_load_i16: 473; X64: # %bb.0: 474; X64-NEXT: movzwl 1(%rdi), %eax 475; X64-NEXT: retq 476 %p0 = getelementptr i16, i16* %p, i32 0 477 %p1 = getelementptr i16, i16* %p, i32 1 478 %ld0 = load i16, i16 *%p0 479 %ld1 = load i16, i16 *%p1 480 %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8) 481 ret i16 %res 482} 483 484define i32 @combine_fshl_load_i32(i32* %p) nounwind { 485; X86-LABEL: combine_fshl_load_i32: 486; X86: # %bb.0: 487; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 488; X86-NEXT: movl 11(%eax), %eax 489; X86-NEXT: retl 490; 491; X64-LABEL: combine_fshl_load_i32: 492; X64: # %bb.0: 493; X64-NEXT: movl 11(%rdi), %eax 494; X64-NEXT: retq 495 %p0 = getelementptr i32, i32* %p, i32 2 496 %p1 = getelementptr i32, i32* %p, i32 3 497 %ld0 = load i32, i32 *%p0 498 %ld1 = load i32, i32 *%p1 499 %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8) 500 ret i32 %res 501} 502 503define i64 @combine_fshl_load_i64(i64* %p) nounwind { 504; X86-LABEL: combine_fshl_load_i64: 505; X86: # %bb.0: 506; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 507; X86-NEXT: movl 13(%ecx), %eax 508; X86-NEXT: movl 17(%ecx), %edx 509; X86-NEXT: retl 510; 511; X64-LABEL: combine_fshl_load_i64: 512; X64: # %bb.0: 513; X64-NEXT: movq 13(%rdi), %rax 514; X64-NEXT: retq 515 %p0 = getelementptr i64, i64* %p, i64 1 516 %p1 = getelementptr i64, i64* %p, i64 2 517 %ld0 = load i64, i64 *%p0 518 %ld1 = load i64, i64 *%p1 519 %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24) 520 ret i64 %res 521} 522 523!llvm.module.flags = !{!0} 524!0 = !{i32 1, !"ProfileSummary", !1} 525!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 526!2 = !{!"ProfileFormat", !"InstrProf"} 527!3 = !{!"TotalCount", i64 10000} 528!4 = !{!"MaxCount", i64 10} 529!5 = !{!"MaxInternalCount", i64 1} 530!6 = !{!"MaxFunctionCount", i64 1000} 531!7 = !{!"NumCounts", i64 3} 532!8 = !{!"NumFunctions", i64 3} 533!9 = !{!"DetailedSummary", !10} 534!10 = !{!11, !12, !13} 535!11 = !{i32 10000, i64 100, i32 1} 536!12 = !{i32 999000, i64 100, i32 1} 537!13 = !{i32 999999, i64 1, i32 2} 538!14 = !{!"function_entry_count", i64 0} 539