1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s 3; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s 4 5define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { 6; BMI2-LABEL: shl32: 7; BMI2: # %bb.0: 8; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 9; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax 10; BMI2-NEXT: retl 11; 12; BMI264-LABEL: shl32: 13; BMI264: # %bb.0: 14; BMI264-NEXT: shlxl %esi, %edi, %eax 15; BMI264-NEXT: retq 16 %shl = shl i32 %x, %shamt 17 ret i32 %shl 18} 19 20define i32 @shl32i(i32 %x) nounwind uwtable readnone { 21; BMI2-LABEL: shl32i: 22; BMI2: # %bb.0: 23; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 24; BMI2-NEXT: shll $5, %eax 25; BMI2-NEXT: retl 26; 27; BMI264-LABEL: shl32i: 28; BMI264: # %bb.0: 29; BMI264-NEXT: movl %edi, %eax 30; BMI264-NEXT: shll $5, %eax 31; BMI264-NEXT: retq 32 %shl = shl i32 %x, 5 33 ret i32 %shl 34} 35 36define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone { 37; BMI2-LABEL: shl32p: 38; BMI2: # %bb.0: 39; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 40; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 41; BMI2-NEXT: shlxl %ecx, (%eax), %eax 42; BMI2-NEXT: retl 43; 44; BMI264-LABEL: shl32p: 45; BMI264: # %bb.0: 46; BMI264-NEXT: shlxl %esi, (%rdi), %eax 47; BMI264-NEXT: retq 48 %x = load i32, i32* %p 49 %shl = shl i32 %x, %shamt 50 ret i32 %shl 51} 52 53define i32 @shl32pi(i32* %p) nounwind uwtable readnone { 54; BMI2-LABEL: shl32pi: 55; BMI2: # %bb.0: 56; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 57; BMI2-NEXT: movl (%eax), %eax 58; BMI2-NEXT: shll $5, %eax 59; BMI2-NEXT: retl 60; 61; BMI264-LABEL: shl32pi: 62; BMI264: # %bb.0: 63; BMI264-NEXT: movl (%rdi), %eax 64; BMI264-NEXT: shll $5, %eax 65; BMI264-NEXT: retq 66 %x = load i32, i32* %p 67 %shl = shl i32 %x, 5 68 ret i32 %shl 69} 70 71define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { 72; BMI2-LABEL: shl64: 73; BMI2: # %bb.0: 74; BMI2-NEXT: pushl %esi 75; BMI2-NEXT: .cfi_def_cfa_offset 8 76; BMI2-NEXT: .cfi_offset %esi, -8 77; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 78; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 79; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 80; BMI2-NEXT: shldl %cl, %eax, %edx 81; BMI2-NEXT: shlxl %ecx, %eax, %esi 82; BMI2-NEXT: xorl %eax, %eax 83; BMI2-NEXT: testb $32, %cl 84; BMI2-NEXT: cmovnel %esi, %edx 85; BMI2-NEXT: cmovel %esi, %eax 86; BMI2-NEXT: popl %esi 87; BMI2-NEXT: .cfi_def_cfa_offset 4 88; BMI2-NEXT: retl 89; 90; BMI264-LABEL: shl64: 91; BMI264: # %bb.0: 92; BMI264-NEXT: shlxq %rsi, %rdi, %rax 93; BMI264-NEXT: retq 94 %shl = shl i64 %x, %shamt 95 ret i64 %shl 96} 97 98define i64 @shl64i(i64 %x) nounwind uwtable readnone { 99; BMI2-LABEL: shl64i: 100; BMI2: # %bb.0: 101; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 102; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 103; BMI2-NEXT: shldl $7, %eax, %edx 104; BMI2-NEXT: shll $7, %eax 105; BMI2-NEXT: retl 106; 107; BMI264-LABEL: shl64i: 108; BMI264: # %bb.0: 109; BMI264-NEXT: movq %rdi, %rax 110; BMI264-NEXT: shlq $7, %rax 111; BMI264-NEXT: retq 112 %shl = shl i64 %x, 7 113 ret i64 %shl 114} 115 116define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone { 117; BMI2-LABEL: shl64p: 118; BMI2: # %bb.0: 119; BMI2-NEXT: pushl %esi 120; BMI2-NEXT: .cfi_def_cfa_offset 8 121; BMI2-NEXT: .cfi_offset %esi, -8 122; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 123; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 124; BMI2-NEXT: movl (%eax), %esi 125; BMI2-NEXT: movl 4(%eax), %edx 126; BMI2-NEXT: shldl %cl, %esi, %edx 127; BMI2-NEXT: shlxl %ecx, %esi, %esi 128; BMI2-NEXT: xorl %eax, %eax 129; BMI2-NEXT: testb $32, %cl 130; BMI2-NEXT: cmovnel %esi, %edx 131; BMI2-NEXT: cmovel %esi, %eax 132; BMI2-NEXT: popl %esi 133; BMI2-NEXT: .cfi_def_cfa_offset 4 134; BMI2-NEXT: retl 135; 136; BMI264-LABEL: shl64p: 137; BMI264: # %bb.0: 138; BMI264-NEXT: shlxq %rsi, (%rdi), %rax 139; BMI264-NEXT: retq 140 %x = load i64, i64* %p 141 %shl = shl i64 %x, %shamt 142 ret i64 %shl 143} 144 145define i64 @shl64pi(i64* %p) nounwind uwtable readnone { 146; BMI2-LABEL: shl64pi: 147; BMI2: # %bb.0: 148; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 149; BMI2-NEXT: movl (%ecx), %eax 150; BMI2-NEXT: movl 4(%ecx), %edx 151; BMI2-NEXT: shldl $7, %eax, %edx 152; BMI2-NEXT: shll $7, %eax 153; BMI2-NEXT: retl 154; 155; BMI264-LABEL: shl64pi: 156; BMI264: # %bb.0: 157; BMI264-NEXT: movq (%rdi), %rax 158; BMI264-NEXT: shlq $7, %rax 159; BMI264-NEXT: retq 160 %x = load i64, i64* %p 161 %shl = shl i64 %x, 7 162 ret i64 %shl 163} 164 165define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { 166; BMI2-LABEL: lshr32: 167; BMI2: # %bb.0: 168; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 169; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax 170; BMI2-NEXT: retl 171; 172; BMI264-LABEL: lshr32: 173; BMI264: # %bb.0: 174; BMI264-NEXT: shrxl %esi, %edi, %eax 175; BMI264-NEXT: retq 176 %shl = lshr i32 %x, %shamt 177 ret i32 %shl 178} 179 180define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { 181; BMI2-LABEL: lshr32p: 182; BMI2: # %bb.0: 183; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 184; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 185; BMI2-NEXT: shrxl %ecx, (%eax), %eax 186; BMI2-NEXT: retl 187; 188; BMI264-LABEL: lshr32p: 189; BMI264: # %bb.0: 190; BMI264-NEXT: shrxl %esi, (%rdi), %eax 191; BMI264-NEXT: retq 192 %x = load i32, i32* %p 193 %shl = lshr i32 %x, %shamt 194 ret i32 %shl 195} 196 197define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { 198; BMI2-LABEL: lshr64: 199; BMI2: # %bb.0: 200; BMI2-NEXT: pushl %esi 201; BMI2-NEXT: .cfi_def_cfa_offset 8 202; BMI2-NEXT: .cfi_offset %esi, -8 203; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 204; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 205; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 206; BMI2-NEXT: shrdl %cl, %edx, %eax 207; BMI2-NEXT: shrxl %ecx, %edx, %esi 208; BMI2-NEXT: xorl %edx, %edx 209; BMI2-NEXT: testb $32, %cl 210; BMI2-NEXT: cmovnel %esi, %eax 211; BMI2-NEXT: cmovel %esi, %edx 212; BMI2-NEXT: popl %esi 213; BMI2-NEXT: .cfi_def_cfa_offset 4 214; BMI2-NEXT: retl 215; 216; BMI264-LABEL: lshr64: 217; BMI264: # %bb.0: 218; BMI264-NEXT: shrxq %rsi, %rdi, %rax 219; BMI264-NEXT: retq 220 %shl = lshr i64 %x, %shamt 221 ret i64 %shl 222} 223 224define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { 225; BMI2-LABEL: lshr64p: 226; BMI2: # %bb.0: 227; BMI2-NEXT: pushl %esi 228; BMI2-NEXT: .cfi_def_cfa_offset 8 229; BMI2-NEXT: .cfi_offset %esi, -8 230; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 231; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 232; BMI2-NEXT: movl (%edx), %eax 233; BMI2-NEXT: movl 4(%edx), %edx 234; BMI2-NEXT: shrdl %cl, %edx, %eax 235; BMI2-NEXT: shrxl %ecx, %edx, %esi 236; BMI2-NEXT: xorl %edx, %edx 237; BMI2-NEXT: testb $32, %cl 238; BMI2-NEXT: cmovnel %esi, %eax 239; BMI2-NEXT: cmovel %esi, %edx 240; BMI2-NEXT: popl %esi 241; BMI2-NEXT: .cfi_def_cfa_offset 4 242; BMI2-NEXT: retl 243; 244; BMI264-LABEL: lshr64p: 245; BMI264: # %bb.0: 246; BMI264-NEXT: shrxq %rsi, (%rdi), %rax 247; BMI264-NEXT: retq 248 %x = load i64, i64* %p 249 %shl = lshr i64 %x, %shamt 250 ret i64 %shl 251} 252 253define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { 254; BMI2-LABEL: ashr32: 255; BMI2: # %bb.0: 256; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 257; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax 258; BMI2-NEXT: retl 259; 260; BMI264-LABEL: ashr32: 261; BMI264: # %bb.0: 262; BMI264-NEXT: sarxl %esi, %edi, %eax 263; BMI264-NEXT: retq 264 %shl = ashr i32 %x, %shamt 265 ret i32 %shl 266} 267 268define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone { 269; BMI2-LABEL: ashr32p: 270; BMI2: # %bb.0: 271; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 272; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 273; BMI2-NEXT: sarxl %ecx, (%eax), %eax 274; BMI2-NEXT: retl 275; 276; BMI264-LABEL: ashr32p: 277; BMI264: # %bb.0: 278; BMI264-NEXT: sarxl %esi, (%rdi), %eax 279; BMI264-NEXT: retq 280 %x = load i32, i32* %p 281 %shl = ashr i32 %x, %shamt 282 ret i32 %shl 283} 284 285define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { 286; BMI2-LABEL: ashr64: 287; BMI2: # %bb.0: 288; BMI2-NEXT: pushl %esi 289; BMI2-NEXT: .cfi_def_cfa_offset 8 290; BMI2-NEXT: .cfi_offset %esi, -8 291; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 292; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 293; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 294; BMI2-NEXT: shrdl %cl, %edx, %eax 295; BMI2-NEXT: sarxl %ecx, %edx, %esi 296; BMI2-NEXT: sarl $31, %edx 297; BMI2-NEXT: testb $32, %cl 298; BMI2-NEXT: cmovnel %esi, %eax 299; BMI2-NEXT: cmovel %esi, %edx 300; BMI2-NEXT: popl %esi 301; BMI2-NEXT: .cfi_def_cfa_offset 4 302; BMI2-NEXT: retl 303; 304; BMI264-LABEL: ashr64: 305; BMI264: # %bb.0: 306; BMI264-NEXT: sarxq %rsi, %rdi, %rax 307; BMI264-NEXT: retq 308 %shl = ashr i64 %x, %shamt 309 ret i64 %shl 310} 311 312define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone { 313; BMI2-LABEL: ashr64p: 314; BMI2: # %bb.0: 315; BMI2-NEXT: pushl %esi 316; BMI2-NEXT: .cfi_def_cfa_offset 8 317; BMI2-NEXT: .cfi_offset %esi, -8 318; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 319; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 320; BMI2-NEXT: movl (%edx), %eax 321; BMI2-NEXT: movl 4(%edx), %edx 322; BMI2-NEXT: shrdl %cl, %edx, %eax 323; BMI2-NEXT: sarxl %ecx, %edx, %esi 324; BMI2-NEXT: sarl $31, %edx 325; BMI2-NEXT: testb $32, %cl 326; BMI2-NEXT: cmovnel %esi, %eax 327; BMI2-NEXT: cmovel %esi, %edx 328; BMI2-NEXT: popl %esi 329; BMI2-NEXT: .cfi_def_cfa_offset 4 330; BMI2-NEXT: retl 331; 332; BMI264-LABEL: ashr64p: 333; BMI264: # %bb.0: 334; BMI264-NEXT: sarxq %rsi, (%rdi), %rax 335; BMI264-NEXT: retq 336 %x = load i64, i64* %p 337 %shl = ashr i64 %x, %shamt 338 ret i64 %shl 339} 340 341define i32 @shl32and(i32 %t, i32 %val) nounwind { 342; BMI2-LABEL: shl32and: 343; BMI2: # %bb.0: 344; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 345; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax 346; BMI2-NEXT: retl 347; 348; BMI264-LABEL: shl32and: 349; BMI264: # %bb.0: 350; BMI264-NEXT: shlxl %edi, %esi, %eax 351; BMI264-NEXT: retq 352 %shamt = and i32 %t, 31 353 %res = shl i32 %val, %shamt 354 ret i32 %res 355} 356 357define i64 @shl64and(i64 %t, i64 %val) nounwind { 358; BMI2-LABEL: shl64and: 359; BMI2: # %bb.0: 360; BMI2-NEXT: pushl %esi 361; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 362; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 363; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 364; BMI2-NEXT: shldl %cl, %eax, %edx 365; BMI2-NEXT: shlxl %ecx, %eax, %esi 366; BMI2-NEXT: xorl %eax, %eax 367; BMI2-NEXT: testb $32, %cl 368; BMI2-NEXT: cmovnel %esi, %edx 369; BMI2-NEXT: cmovel %esi, %eax 370; BMI2-NEXT: popl %esi 371; BMI2-NEXT: retl 372; 373; BMI264-LABEL: shl64and: 374; BMI264: # %bb.0: 375; BMI264-NEXT: shlxq %rdi, %rsi, %rax 376; BMI264-NEXT: retq 377 %shamt = and i64 %t, 63 378 %res = shl i64 %val, %shamt 379 ret i64 %res 380} 381 382define i32 @lshr32and(i32 %t, i32 %val) nounwind { 383; BMI2-LABEL: lshr32and: 384; BMI2: # %bb.0: 385; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 386; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax 387; BMI2-NEXT: retl 388; 389; BMI264-LABEL: lshr32and: 390; BMI264: # %bb.0: 391; BMI264-NEXT: shrxl %edi, %esi, %eax 392; BMI264-NEXT: retq 393 %shamt = and i32 %t, 31 394 %res = lshr i32 %val, %shamt 395 ret i32 %res 396} 397 398define i64 @lshr64and(i64 %t, i64 %val) nounwind { 399; BMI2-LABEL: lshr64and: 400; BMI2: # %bb.0: 401; BMI2-NEXT: pushl %esi 402; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 403; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 404; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 405; BMI2-NEXT: shrdl %cl, %edx, %eax 406; BMI2-NEXT: shrxl %ecx, %edx, %esi 407; BMI2-NEXT: xorl %edx, %edx 408; BMI2-NEXT: testb $32, %cl 409; BMI2-NEXT: cmovnel %esi, %eax 410; BMI2-NEXT: cmovel %esi, %edx 411; BMI2-NEXT: popl %esi 412; BMI2-NEXT: retl 413; 414; BMI264-LABEL: lshr64and: 415; BMI264: # %bb.0: 416; BMI264-NEXT: shrxq %rdi, %rsi, %rax 417; BMI264-NEXT: retq 418 %shamt = and i64 %t, 63 419 %res = lshr i64 %val, %shamt 420 ret i64 %res 421} 422 423define i32 @ashr32and(i32 %t, i32 %val) nounwind { 424; BMI2-LABEL: ashr32and: 425; BMI2: # %bb.0: 426; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 427; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax 428; BMI2-NEXT: retl 429; 430; BMI264-LABEL: ashr32and: 431; BMI264: # %bb.0: 432; BMI264-NEXT: sarxl %edi, %esi, %eax 433; BMI264-NEXT: retq 434 %shamt = and i32 %t, 31 435 %res = ashr i32 %val, %shamt 436 ret i32 %res 437} 438 439define i64 @ashr64and(i64 %t, i64 %val) nounwind { 440; BMI2-LABEL: ashr64and: 441; BMI2: # %bb.0: 442; BMI2-NEXT: pushl %esi 443; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 444; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 445; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 446; BMI2-NEXT: shrdl %cl, %edx, %eax 447; BMI2-NEXT: sarxl %ecx, %edx, %esi 448; BMI2-NEXT: sarl $31, %edx 449; BMI2-NEXT: testb $32, %cl 450; BMI2-NEXT: cmovnel %esi, %eax 451; BMI2-NEXT: cmovel %esi, %edx 452; BMI2-NEXT: popl %esi 453; BMI2-NEXT: retl 454; 455; BMI264-LABEL: ashr64and: 456; BMI264: # %bb.0: 457; BMI264-NEXT: sarxq %rdi, %rsi, %rax 458; BMI264-NEXT: retq 459 %shamt = and i64 %t, 63 460 %res = ashr i64 %val, %shamt 461 ret i64 %res 462} 463