1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK0 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-FALLBACK2 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2 12 13; Patterns: 14; c) x & (-1 >> y) 15; d) x << y >> y 16; are equivalent, but we prefer the second variant if we have BMI2. 17 18; We do not test the variant where y = (32 - z), because that is BMI2's BZHI. 19 20; ---------------------------------------------------------------------------- ; 21; 8-bit 22; ---------------------------------------------------------------------------- ; 23 24define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind { 25; X86-LABEL: clear_highbits8_c0: 26; X86: # %bb.0: 27; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 28; X86-NEXT: movb {{[0-9]+}}(%esp), %al 29; X86-NEXT: shlb %cl, %al 30; X86-NEXT: shrb %cl, %al 31; X86-NEXT: retl 32; 33; X64-LABEL: clear_highbits8_c0: 34; X64: # %bb.0: 35; X64-NEXT: movl %esi, %ecx 36; X64-NEXT: movl %edi, %eax 37; X64-NEXT: shlb %cl, %al 38; X64-NEXT: # kill: def $cl killed $cl killed $ecx 39; X64-NEXT: shrb %cl, %al 40; X64-NEXT: # kill: def $al killed $al killed $eax 41; X64-NEXT: retq 42 %mask = lshr i8 -1, %numhighbits 43 %masked = and i8 %mask, %val 44 ret i8 %masked 45} 46 47define i8 @clear_highbits8_c2_load(i8* %w, i8 %numhighbits) nounwind { 48; X86-LABEL: clear_highbits8_c2_load: 49; X86: # %bb.0: 50; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 51; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 52; X86-NEXT: movb (%eax), %al 53; X86-NEXT: shlb %cl, %al 54; X86-NEXT: shrb %cl, %al 55; X86-NEXT: retl 56; 57; X64-LABEL: clear_highbits8_c2_load: 58; X64: # %bb.0: 59; X64-NEXT: movl %esi, %ecx 60; X64-NEXT: movb (%rdi), %al 61; X64-NEXT: shlb %cl, %al 62; X64-NEXT: # kill: def $cl killed $cl killed $ecx 63; X64-NEXT: shrb %cl, %al 64; X64-NEXT: retq 65 %val = load i8, i8* %w 66 %mask = lshr i8 -1, %numhighbits 67 %masked = and i8 %mask, %val 68 ret i8 %masked 69} 70 71define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind { 72; X86-LABEL: clear_highbits8_c4_commutative: 73; X86: # %bb.0: 74; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 75; X86-NEXT: movb {{[0-9]+}}(%esp), %al 76; X86-NEXT: shlb %cl, %al 77; X86-NEXT: shrb %cl, %al 78; X86-NEXT: retl 79; 80; X64-LABEL: clear_highbits8_c4_commutative: 81; X64: # %bb.0: 82; X64-NEXT: movl %esi, %ecx 83; X64-NEXT: movl %edi, %eax 84; X64-NEXT: shlb %cl, %al 85; X64-NEXT: # kill: def $cl killed $cl killed $ecx 86; X64-NEXT: shrb %cl, %al 87; X64-NEXT: # kill: def $al killed $al killed $eax 88; X64-NEXT: retq 89 %mask = lshr i8 -1, %numhighbits 90 %masked = and i8 %val, %mask ; swapped order 91 ret i8 %masked 92} 93 94; ---------------------------------------------------------------------------- ; 95; 16-bit 96; ---------------------------------------------------------------------------- ; 97 98define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind { 99; X86-NOBMI2-LABEL: clear_highbits16_c0: 100; X86-NOBMI2: # %bb.0: 101; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 102; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 103; X86-NOBMI2-NEXT: shll %cl, %eax 104; X86-NOBMI2-NEXT: movzwl %ax, %eax 105; X86-NOBMI2-NEXT: shrl %cl, %eax 106; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 107; X86-NOBMI2-NEXT: retl 108; 109; X86-BMI2-LABEL: clear_highbits16_c0: 110; X86-BMI2: # %bb.0: 111; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 112; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 113; X86-BMI2-NEXT: movzwl %cx, %ecx 114; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 115; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 116; X86-BMI2-NEXT: retl 117; 118; X64-NOBMI2-LABEL: clear_highbits16_c0: 119; X64-NOBMI2: # %bb.0: 120; X64-NOBMI2-NEXT: movl %esi, %ecx 121; X64-NOBMI2-NEXT: shll %cl, %edi 122; X64-NOBMI2-NEXT: movzwl %di, %eax 123; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 124; X64-NOBMI2-NEXT: shrl %cl, %eax 125; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 126; X64-NOBMI2-NEXT: retq 127; 128; X64-BMI2-LABEL: clear_highbits16_c0: 129; X64-BMI2: # %bb.0: 130; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 131; X64-BMI2-NEXT: movzwl %ax, %eax 132; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 133; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 134; X64-BMI2-NEXT: retq 135 %mask = lshr i16 -1, %numhighbits 136 %masked = and i16 %mask, %val 137 ret i16 %masked 138} 139 140define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind { 141; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext: 142; X86-NOBMI2: # %bb.0: 143; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 144; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-NOBMI2-NEXT: shll %cl, %eax 146; X86-NOBMI2-NEXT: movzwl %ax, %eax 147; X86-NOBMI2-NEXT: shrl %cl, %eax 148; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 149; X86-NOBMI2-NEXT: retl 150; 151; X86-BMI2-LABEL: clear_highbits16_c1_indexzext: 152; X86-BMI2: # %bb.0: 153; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 154; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 155; X86-BMI2-NEXT: movzwl %cx, %ecx 156; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 157; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 158; X86-BMI2-NEXT: retl 159; 160; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext: 161; X64-NOBMI2: # %bb.0: 162; X64-NOBMI2-NEXT: movl %esi, %ecx 163; X64-NOBMI2-NEXT: shll %cl, %edi 164; X64-NOBMI2-NEXT: movzwl %di, %eax 165; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 166; X64-NOBMI2-NEXT: shrl %cl, %eax 167; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 168; X64-NOBMI2-NEXT: retq 169; 170; X64-BMI2-LABEL: clear_highbits16_c1_indexzext: 171; X64-BMI2: # %bb.0: 172; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 173; X64-BMI2-NEXT: movzwl %ax, %eax 174; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 175; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 176; X64-BMI2-NEXT: retq 177 %sh_prom = zext i8 %numhighbits to i16 178 %mask = lshr i16 -1, %sh_prom 179 %masked = and i16 %mask, %val 180 ret i16 %masked 181} 182 183define i16 @clear_highbits16_c2_load(i16* %w, i16 %numhighbits) nounwind { 184; X86-NOBMI2-LABEL: clear_highbits16_c2_load: 185; X86-NOBMI2: # %bb.0: 186; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 187; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 188; X86-NOBMI2-NEXT: movzwl (%eax), %eax 189; X86-NOBMI2-NEXT: shll %cl, %eax 190; X86-NOBMI2-NEXT: movzwl %ax, %eax 191; X86-NOBMI2-NEXT: shrl %cl, %eax 192; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 193; X86-NOBMI2-NEXT: retl 194; 195; X86-BMI2-LABEL: clear_highbits16_c2_load: 196; X86-BMI2: # %bb.0: 197; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 198; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 199; X86-BMI2-NEXT: movzwl (%ecx), %ecx 200; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 201; X86-BMI2-NEXT: movzwl %cx, %ecx 202; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 203; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 204; X86-BMI2-NEXT: retl 205; 206; X64-NOBMI2-LABEL: clear_highbits16_c2_load: 207; X64-NOBMI2: # %bb.0: 208; X64-NOBMI2-NEXT: movl %esi, %ecx 209; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 210; X64-NOBMI2-NEXT: shll %cl, %eax 211; X64-NOBMI2-NEXT: movzwl %ax, %eax 212; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 213; X64-NOBMI2-NEXT: shrl %cl, %eax 214; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 215; X64-NOBMI2-NEXT: retq 216; 217; X64-BMI2-LABEL: clear_highbits16_c2_load: 218; X64-BMI2: # %bb.0: 219; X64-BMI2-NEXT: movzwl (%rdi), %eax 220; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 221; X64-BMI2-NEXT: movzwl %ax, %eax 222; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 223; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 224; X64-BMI2-NEXT: retq 225 %val = load i16, i16* %w 226 %mask = lshr i16 -1, %numhighbits 227 %masked = and i16 %mask, %val 228 ret i16 %masked 229} 230 231define i16 @clear_highbits16_c3_load_indexzext(i16* %w, i8 %numhighbits) nounwind { 232; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext: 233; X86-NOBMI2: # %bb.0: 234; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 235; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NOBMI2-NEXT: movzwl (%eax), %eax 237; X86-NOBMI2-NEXT: shll %cl, %eax 238; X86-NOBMI2-NEXT: movzwl %ax, %eax 239; X86-NOBMI2-NEXT: shrl %cl, %eax 240; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 241; X86-NOBMI2-NEXT: retl 242; 243; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext: 244; X86-BMI2: # %bb.0: 245; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 246; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 247; X86-BMI2-NEXT: movzwl (%ecx), %ecx 248; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 249; X86-BMI2-NEXT: movzwl %cx, %ecx 250; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 251; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 252; X86-BMI2-NEXT: retl 253; 254; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext: 255; X64-NOBMI2: # %bb.0: 256; X64-NOBMI2-NEXT: movl %esi, %ecx 257; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 258; X64-NOBMI2-NEXT: shll %cl, %eax 259; X64-NOBMI2-NEXT: movzwl %ax, %eax 260; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 261; X64-NOBMI2-NEXT: shrl %cl, %eax 262; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 263; X64-NOBMI2-NEXT: retq 264; 265; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext: 266; X64-BMI2: # %bb.0: 267; X64-BMI2-NEXT: movzwl (%rdi), %eax 268; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 269; X64-BMI2-NEXT: movzwl %ax, %eax 270; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 271; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 272; X64-BMI2-NEXT: retq 273 %val = load i16, i16* %w 274 %sh_prom = zext i8 %numhighbits to i16 275 %mask = lshr i16 -1, %sh_prom 276 %masked = and i16 %mask, %val 277 ret i16 %masked 278} 279 280define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind { 281; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative: 282; X86-NOBMI2: # %bb.0: 283; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 284; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 285; X86-NOBMI2-NEXT: shll %cl, %eax 286; X86-NOBMI2-NEXT: movzwl %ax, %eax 287; X86-NOBMI2-NEXT: shrl %cl, %eax 288; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 289; X86-NOBMI2-NEXT: retl 290; 291; X86-BMI2-LABEL: clear_highbits16_c4_commutative: 292; X86-BMI2: # %bb.0: 293; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 294; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 295; X86-BMI2-NEXT: movzwl %cx, %ecx 296; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 297; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 298; X86-BMI2-NEXT: retl 299; 300; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative: 301; X64-NOBMI2: # %bb.0: 302; X64-NOBMI2-NEXT: movl %esi, %ecx 303; X64-NOBMI2-NEXT: shll %cl, %edi 304; X64-NOBMI2-NEXT: movzwl %di, %eax 305; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 306; X64-NOBMI2-NEXT: shrl %cl, %eax 307; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 308; X64-NOBMI2-NEXT: retq 309; 310; X64-BMI2-LABEL: clear_highbits16_c4_commutative: 311; X64-BMI2: # %bb.0: 312; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 313; X64-BMI2-NEXT: movzwl %ax, %eax 314; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 315; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 316; X64-BMI2-NEXT: retq 317 %mask = lshr i16 -1, %numhighbits 318 %masked = and i16 %val, %mask ; swapped order 319 ret i16 %masked 320} 321 322; ---------------------------------------------------------------------------- ; 323; 32-bit 324; ---------------------------------------------------------------------------- ; 325 326define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind { 327; X86-NOBMI2-LABEL: clear_highbits32_c0: 328; X86-NOBMI2: # %bb.0: 329; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 330; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 331; X86-NOBMI2-NEXT: shll %cl, %eax 332; X86-NOBMI2-NEXT: shrl %cl, %eax 333; X86-NOBMI2-NEXT: retl 334; 335; X86-BMI2-LABEL: clear_highbits32_c0: 336; X86-BMI2: # %bb.0: 337; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 338; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 339; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 340; X86-BMI2-NEXT: retl 341; 342; X64-NOBMI2-LABEL: clear_highbits32_c0: 343; X64-NOBMI2: # %bb.0: 344; X64-NOBMI2-NEXT: movl %esi, %ecx 345; X64-NOBMI2-NEXT: movl %edi, %eax 346; X64-NOBMI2-NEXT: shll %cl, %eax 347; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 348; X64-NOBMI2-NEXT: shrl %cl, %eax 349; X64-NOBMI2-NEXT: retq 350; 351; X64-BMI2-LABEL: clear_highbits32_c0: 352; X64-BMI2: # %bb.0: 353; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 354; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 355; X64-BMI2-NEXT: retq 356 %mask = lshr i32 -1, %numhighbits 357 %masked = and i32 %mask, %val 358 ret i32 %masked 359} 360 361define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind { 362; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext: 363; X86-NOBMI2: # %bb.0: 364; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 365; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 366; X86-NOBMI2-NEXT: shll %cl, %eax 367; X86-NOBMI2-NEXT: shrl %cl, %eax 368; X86-NOBMI2-NEXT: retl 369; 370; X86-BMI2-LABEL: clear_highbits32_c1_indexzext: 371; X86-BMI2: # %bb.0: 372; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 373; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 374; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 375; X86-BMI2-NEXT: retl 376; 377; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext: 378; X64-NOBMI2: # %bb.0: 379; X64-NOBMI2-NEXT: movl %esi, %ecx 380; X64-NOBMI2-NEXT: movl %edi, %eax 381; X64-NOBMI2-NEXT: shll %cl, %eax 382; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 383; X64-NOBMI2-NEXT: shrl %cl, %eax 384; X64-NOBMI2-NEXT: retq 385; 386; X64-BMI2-LABEL: clear_highbits32_c1_indexzext: 387; X64-BMI2: # %bb.0: 388; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 389; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 390; X64-BMI2-NEXT: retq 391 %sh_prom = zext i8 %numhighbits to i32 392 %mask = lshr i32 -1, %sh_prom 393 %masked = and i32 %mask, %val 394 ret i32 %masked 395} 396 397define i32 @clear_highbits32_c2_load(i32* %w, i32 %numhighbits) nounwind { 398; X86-NOBMI2-LABEL: clear_highbits32_c2_load: 399; X86-NOBMI2: # %bb.0: 400; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 401; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 402; X86-NOBMI2-NEXT: movl (%eax), %eax 403; X86-NOBMI2-NEXT: shll %cl, %eax 404; X86-NOBMI2-NEXT: shrl %cl, %eax 405; X86-NOBMI2-NEXT: retl 406; 407; X86-BMI2-LABEL: clear_highbits32_c2_load: 408; X86-BMI2: # %bb.0: 409; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 410; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 411; X86-BMI2-NEXT: shlxl %ecx, (%eax), %eax 412; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 413; X86-BMI2-NEXT: retl 414; 415; X64-NOBMI2-LABEL: clear_highbits32_c2_load: 416; X64-NOBMI2: # %bb.0: 417; X64-NOBMI2-NEXT: movl %esi, %ecx 418; X64-NOBMI2-NEXT: movl (%rdi), %eax 419; X64-NOBMI2-NEXT: shll %cl, %eax 420; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 421; X64-NOBMI2-NEXT: shrl %cl, %eax 422; X64-NOBMI2-NEXT: retq 423; 424; X64-BMI2-LABEL: clear_highbits32_c2_load: 425; X64-BMI2: # %bb.0: 426; X64-BMI2-NEXT: shlxl %esi, (%rdi), %eax 427; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 428; X64-BMI2-NEXT: retq 429 %val = load i32, i32* %w 430 %mask = lshr i32 -1, %numhighbits 431 %masked = and i32 %mask, %val 432 ret i32 %masked 433} 434 435define i32 @clear_highbits32_c3_load_indexzext(i32* %w, i8 %numhighbits) nounwind { 436; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext: 437; X86-NOBMI2: # %bb.0: 438; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 439; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 440; X86-NOBMI2-NEXT: movl (%eax), %eax 441; X86-NOBMI2-NEXT: shll %cl, %eax 442; X86-NOBMI2-NEXT: shrl %cl, %eax 443; X86-NOBMI2-NEXT: retl 444; 445; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext: 446; X86-BMI2: # %bb.0: 447; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 448; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 449; X86-BMI2-NEXT: shlxl %ecx, (%eax), %eax 450; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 451; X86-BMI2-NEXT: retl 452; 453; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext: 454; X64-NOBMI2: # %bb.0: 455; X64-NOBMI2-NEXT: movl %esi, %ecx 456; X64-NOBMI2-NEXT: movl (%rdi), %eax 457; X64-NOBMI2-NEXT: shll %cl, %eax 458; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 459; X64-NOBMI2-NEXT: shrl %cl, %eax 460; X64-NOBMI2-NEXT: retq 461; 462; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext: 463; X64-BMI2: # %bb.0: 464; X64-BMI2-NEXT: shlxl %esi, (%rdi), %eax 465; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 466; X64-BMI2-NEXT: retq 467 %val = load i32, i32* %w 468 %sh_prom = zext i8 %numhighbits to i32 469 %mask = lshr i32 -1, %sh_prom 470 %masked = and i32 %mask, %val 471 ret i32 %masked 472} 473 474define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind { 475; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative: 476; X86-NOBMI2: # %bb.0: 477; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 478; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 479; X86-NOBMI2-NEXT: shll %cl, %eax 480; X86-NOBMI2-NEXT: shrl %cl, %eax 481; X86-NOBMI2-NEXT: retl 482; 483; X86-BMI2-LABEL: clear_highbits32_c4_commutative: 484; X86-BMI2: # %bb.0: 485; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 486; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 487; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 488; X86-BMI2-NEXT: retl 489; 490; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative: 491; X64-NOBMI2: # %bb.0: 492; X64-NOBMI2-NEXT: movl %esi, %ecx 493; X64-NOBMI2-NEXT: movl %edi, %eax 494; X64-NOBMI2-NEXT: shll %cl, %eax 495; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 496; X64-NOBMI2-NEXT: shrl %cl, %eax 497; X64-NOBMI2-NEXT: retq 498; 499; X64-BMI2-LABEL: clear_highbits32_c4_commutative: 500; X64-BMI2: # %bb.0: 501; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 502; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 503; X64-BMI2-NEXT: retq 504 %mask = lshr i32 -1, %numhighbits 505 %masked = and i32 %val, %mask ; swapped order 506 ret i32 %masked 507} 508 509; ---------------------------------------------------------------------------- ; 510; 64-bit 511; ---------------------------------------------------------------------------- ; 512 513define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { 514; X86-FALLBACK0-LABEL: clear_highbits64_c0: 515; X86-FALLBACK0: # %bb.0: 516; X86-FALLBACK0-NEXT: pushl %esi 517; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 518; X86-FALLBACK0-NEXT: movl $-1, %eax 519; X86-FALLBACK0-NEXT: movl $-1, %esi 520; X86-FALLBACK0-NEXT: shrl %cl, %esi 521; X86-FALLBACK0-NEXT: xorl %edx, %edx 522; X86-FALLBACK0-NEXT: testb $32, %cl 523; X86-FALLBACK0-NEXT: jne .LBB13_1 524; X86-FALLBACK0-NEXT: # %bb.2: 525; X86-FALLBACK0-NEXT: movl %esi, %edx 526; X86-FALLBACK0-NEXT: jmp .LBB13_3 527; X86-FALLBACK0-NEXT: .LBB13_1: 528; X86-FALLBACK0-NEXT: movl %esi, %eax 529; X86-FALLBACK0-NEXT: .LBB13_3: 530; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax 531; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx 532; X86-FALLBACK0-NEXT: popl %esi 533; X86-FALLBACK0-NEXT: retl 534; 535; X86-FALLBACK1-LABEL: clear_highbits64_c0: 536; X86-FALLBACK1: # %bb.0: 537; X86-FALLBACK1-NEXT: pushl %esi 538; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 539; X86-FALLBACK1-NEXT: movl $-1, %esi 540; X86-FALLBACK1-NEXT: movl $-1, %eax 541; X86-FALLBACK1-NEXT: shrl %cl, %eax 542; X86-FALLBACK1-NEXT: xorl %edx, %edx 543; X86-FALLBACK1-NEXT: testb $32, %cl 544; X86-FALLBACK1-NEXT: cmovel %eax, %edx 545; X86-FALLBACK1-NEXT: cmovel %esi, %eax 546; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax 547; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx 548; X86-FALLBACK1-NEXT: popl %esi 549; X86-FALLBACK1-NEXT: retl 550; 551; X86-FALLBACK2-LABEL: clear_highbits64_c0: 552; X86-FALLBACK2: # %bb.0: 553; X86-FALLBACK2-NEXT: pushl %esi 554; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 555; X86-FALLBACK2-NEXT: movl $-1, %esi 556; X86-FALLBACK2-NEXT: movl $-1, %eax 557; X86-FALLBACK2-NEXT: shrl %cl, %eax 558; X86-FALLBACK2-NEXT: xorl %edx, %edx 559; X86-FALLBACK2-NEXT: testb $32, %cl 560; X86-FALLBACK2-NEXT: cmovel %eax, %edx 561; X86-FALLBACK2-NEXT: cmovel %esi, %eax 562; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax 563; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx 564; X86-FALLBACK2-NEXT: popl %esi 565; X86-FALLBACK2-NEXT: retl 566; 567; X86-BMI2-LABEL: clear_highbits64_c0: 568; X86-BMI2: # %bb.0: 569; X86-BMI2-NEXT: pushl %esi 570; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 571; X86-BMI2-NEXT: movl $-1, %eax 572; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 573; X86-BMI2-NEXT: xorl %edx, %edx 574; X86-BMI2-NEXT: testb $32, %cl 575; X86-BMI2-NEXT: cmovel %esi, %edx 576; X86-BMI2-NEXT: cmovnel %esi, %eax 577; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 578; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 579; X86-BMI2-NEXT: popl %esi 580; X86-BMI2-NEXT: retl 581; 582; X64-NOBMI2-LABEL: clear_highbits64_c0: 583; X64-NOBMI2: # %bb.0: 584; X64-NOBMI2-NEXT: movq %rsi, %rcx 585; X64-NOBMI2-NEXT: movq %rdi, %rax 586; X64-NOBMI2-NEXT: shlq %cl, %rax 587; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 588; X64-NOBMI2-NEXT: shrq %cl, %rax 589; X64-NOBMI2-NEXT: retq 590; 591; X64-BMI2-LABEL: clear_highbits64_c0: 592; X64-BMI2: # %bb.0: 593; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax 594; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 595; X64-BMI2-NEXT: retq 596 %mask = lshr i64 -1, %numhighbits 597 %masked = and i64 %mask, %val 598 ret i64 %masked 599} 600 601define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { 602; X86-FALLBACK0-LABEL: clear_highbits64_c1_indexzext: 603; X86-FALLBACK0: # %bb.0: 604; X86-FALLBACK0-NEXT: pushl %esi 605; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 606; X86-FALLBACK0-NEXT: movl $-1, %eax 607; X86-FALLBACK0-NEXT: movl $-1, %esi 608; X86-FALLBACK0-NEXT: shrl %cl, %esi 609; X86-FALLBACK0-NEXT: xorl %edx, %edx 610; X86-FALLBACK0-NEXT: testb $32, %cl 611; X86-FALLBACK0-NEXT: jne .LBB14_1 612; X86-FALLBACK0-NEXT: # %bb.2: 613; X86-FALLBACK0-NEXT: movl %esi, %edx 614; X86-FALLBACK0-NEXT: jmp .LBB14_3 615; X86-FALLBACK0-NEXT: .LBB14_1: 616; X86-FALLBACK0-NEXT: movl %esi, %eax 617; X86-FALLBACK0-NEXT: .LBB14_3: 618; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax 619; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx 620; X86-FALLBACK0-NEXT: popl %esi 621; X86-FALLBACK0-NEXT: retl 622; 623; X86-FALLBACK1-LABEL: clear_highbits64_c1_indexzext: 624; X86-FALLBACK1: # %bb.0: 625; X86-FALLBACK1-NEXT: pushl %esi 626; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 627; X86-FALLBACK1-NEXT: movl $-1, %esi 628; X86-FALLBACK1-NEXT: movl $-1, %eax 629; X86-FALLBACK1-NEXT: shrl %cl, %eax 630; X86-FALLBACK1-NEXT: xorl %edx, %edx 631; X86-FALLBACK1-NEXT: testb $32, %cl 632; X86-FALLBACK1-NEXT: cmovel %eax, %edx 633; X86-FALLBACK1-NEXT: cmovel %esi, %eax 634; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax 635; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx 636; X86-FALLBACK1-NEXT: popl %esi 637; X86-FALLBACK1-NEXT: retl 638; 639; X86-FALLBACK2-LABEL: clear_highbits64_c1_indexzext: 640; X86-FALLBACK2: # %bb.0: 641; X86-FALLBACK2-NEXT: pushl %esi 642; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 643; X86-FALLBACK2-NEXT: movl $-1, %esi 644; X86-FALLBACK2-NEXT: movl $-1, %eax 645; X86-FALLBACK2-NEXT: shrl %cl, %eax 646; X86-FALLBACK2-NEXT: xorl %edx, %edx 647; X86-FALLBACK2-NEXT: testb $32, %cl 648; X86-FALLBACK2-NEXT: cmovel %eax, %edx 649; X86-FALLBACK2-NEXT: cmovel %esi, %eax 650; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax 651; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx 652; X86-FALLBACK2-NEXT: popl %esi 653; X86-FALLBACK2-NEXT: retl 654; 655; X86-BMI2-LABEL: clear_highbits64_c1_indexzext: 656; X86-BMI2: # %bb.0: 657; X86-BMI2-NEXT: pushl %esi 658; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 659; X86-BMI2-NEXT: movl $-1, %eax 660; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 661; X86-BMI2-NEXT: xorl %edx, %edx 662; X86-BMI2-NEXT: testb $32, %cl 663; X86-BMI2-NEXT: cmovel %esi, %edx 664; X86-BMI2-NEXT: cmovnel %esi, %eax 665; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 666; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 667; X86-BMI2-NEXT: popl %esi 668; X86-BMI2-NEXT: retl 669; 670; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext: 671; X64-NOBMI2: # %bb.0: 672; X64-NOBMI2-NEXT: movl %esi, %ecx 673; X64-NOBMI2-NEXT: movq %rdi, %rax 674; X64-NOBMI2-NEXT: shlq %cl, %rax 675; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 676; X64-NOBMI2-NEXT: shrq %cl, %rax 677; X64-NOBMI2-NEXT: retq 678; 679; X64-BMI2-LABEL: clear_highbits64_c1_indexzext: 680; X64-BMI2: # %bb.0: 681; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 682; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax 683; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 684; X64-BMI2-NEXT: retq 685 %sh_prom = zext i8 %numhighbits to i64 686 %mask = lshr i64 -1, %sh_prom 687 %masked = and i64 %mask, %val 688 ret i64 %masked 689} 690 691define i64 @clear_highbits64_c2_load(i64* %w, i64 %numhighbits) nounwind { 692; X86-FALLBACK0-LABEL: clear_highbits64_c2_load: 693; X86-FALLBACK0: # %bb.0: 694; X86-FALLBACK0-NEXT: pushl %edi 695; X86-FALLBACK0-NEXT: pushl %esi 696; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi 697; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 698; X86-FALLBACK0-NEXT: movl $-1, %eax 699; X86-FALLBACK0-NEXT: movl $-1, %edi 700; X86-FALLBACK0-NEXT: shrl %cl, %edi 701; X86-FALLBACK0-NEXT: xorl %edx, %edx 702; X86-FALLBACK0-NEXT: testb $32, %cl 703; X86-FALLBACK0-NEXT: jne .LBB15_1 704; X86-FALLBACK0-NEXT: # %bb.2: 705; X86-FALLBACK0-NEXT: movl %edi, %edx 706; X86-FALLBACK0-NEXT: jmp .LBB15_3 707; X86-FALLBACK0-NEXT: .LBB15_1: 708; X86-FALLBACK0-NEXT: movl %edi, %eax 709; X86-FALLBACK0-NEXT: .LBB15_3: 710; X86-FALLBACK0-NEXT: andl (%esi), %eax 711; X86-FALLBACK0-NEXT: andl 4(%esi), %edx 712; X86-FALLBACK0-NEXT: popl %esi 713; X86-FALLBACK0-NEXT: popl %edi 714; X86-FALLBACK0-NEXT: retl 715; 716; X86-FALLBACK1-LABEL: clear_highbits64_c2_load: 717; X86-FALLBACK1: # %bb.0: 718; X86-FALLBACK1-NEXT: pushl %edi 719; X86-FALLBACK1-NEXT: pushl %esi 720; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi 721; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 722; X86-FALLBACK1-NEXT: movl $-1, %edi 723; X86-FALLBACK1-NEXT: movl $-1, %eax 724; X86-FALLBACK1-NEXT: shrl %cl, %eax 725; X86-FALLBACK1-NEXT: xorl %edx, %edx 726; X86-FALLBACK1-NEXT: testb $32, %cl 727; X86-FALLBACK1-NEXT: cmovel %eax, %edx 728; X86-FALLBACK1-NEXT: cmovel %edi, %eax 729; X86-FALLBACK1-NEXT: andl (%esi), %eax 730; X86-FALLBACK1-NEXT: andl 4(%esi), %edx 731; X86-FALLBACK1-NEXT: popl %esi 732; X86-FALLBACK1-NEXT: popl %edi 733; X86-FALLBACK1-NEXT: retl 734; 735; X86-FALLBACK2-LABEL: clear_highbits64_c2_load: 736; X86-FALLBACK2: # %bb.0: 737; X86-FALLBACK2-NEXT: pushl %edi 738; X86-FALLBACK2-NEXT: pushl %esi 739; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi 740; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 741; X86-FALLBACK2-NEXT: movl $-1, %edi 742; X86-FALLBACK2-NEXT: movl $-1, %eax 743; X86-FALLBACK2-NEXT: shrl %cl, %eax 744; X86-FALLBACK2-NEXT: xorl %edx, %edx 745; X86-FALLBACK2-NEXT: testb $32, %cl 746; X86-FALLBACK2-NEXT: cmovel %eax, %edx 747; X86-FALLBACK2-NEXT: cmovel %edi, %eax 748; X86-FALLBACK2-NEXT: andl (%esi), %eax 749; X86-FALLBACK2-NEXT: andl 4(%esi), %edx 750; X86-FALLBACK2-NEXT: popl %esi 751; X86-FALLBACK2-NEXT: popl %edi 752; X86-FALLBACK2-NEXT: retl 753; 754; X86-BMI2-LABEL: clear_highbits64_c2_load: 755; X86-BMI2: # %bb.0: 756; X86-BMI2-NEXT: pushl %ebx 757; X86-BMI2-NEXT: pushl %esi 758; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 759; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 760; X86-BMI2-NEXT: movl $-1, %eax 761; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi 762; X86-BMI2-NEXT: xorl %edx, %edx 763; X86-BMI2-NEXT: testb $32, %bl 764; X86-BMI2-NEXT: cmovel %esi, %edx 765; X86-BMI2-NEXT: cmovnel %esi, %eax 766; X86-BMI2-NEXT: andl (%ecx), %eax 767; X86-BMI2-NEXT: andl 4(%ecx), %edx 768; X86-BMI2-NEXT: popl %esi 769; X86-BMI2-NEXT: popl %ebx 770; X86-BMI2-NEXT: retl 771; 772; X64-NOBMI2-LABEL: clear_highbits64_c2_load: 773; X64-NOBMI2: # %bb.0: 774; X64-NOBMI2-NEXT: movq %rsi, %rcx 775; X64-NOBMI2-NEXT: movq (%rdi), %rax 776; X64-NOBMI2-NEXT: shlq %cl, %rax 777; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 778; X64-NOBMI2-NEXT: shrq %cl, %rax 779; X64-NOBMI2-NEXT: retq 780; 781; X64-BMI2-LABEL: clear_highbits64_c2_load: 782; X64-BMI2: # %bb.0: 783; X64-BMI2-NEXT: shlxq %rsi, (%rdi), %rax 784; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 785; X64-BMI2-NEXT: retq 786 %val = load i64, i64* %w 787 %mask = lshr i64 -1, %numhighbits 788 %masked = and i64 %mask, %val 789 ret i64 %masked 790} 791 792define i64 @clear_highbits64_c3_load_indexzext(i64* %w, i8 %numhighbits) nounwind { 793; X86-FALLBACK0-LABEL: clear_highbits64_c3_load_indexzext: 794; X86-FALLBACK0: # %bb.0: 795; X86-FALLBACK0-NEXT: pushl %edi 796; X86-FALLBACK0-NEXT: pushl %esi 797; X86-FALLBACK0-NEXT: movl {{[0-9]+}}(%esp), %esi 798; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 799; X86-FALLBACK0-NEXT: movl $-1, %eax 800; X86-FALLBACK0-NEXT: movl $-1, %edi 801; X86-FALLBACK0-NEXT: shrl %cl, %edi 802; X86-FALLBACK0-NEXT: xorl %edx, %edx 803; X86-FALLBACK0-NEXT: testb $32, %cl 804; X86-FALLBACK0-NEXT: jne .LBB16_1 805; X86-FALLBACK0-NEXT: # %bb.2: 806; X86-FALLBACK0-NEXT: movl %edi, %edx 807; X86-FALLBACK0-NEXT: jmp .LBB16_3 808; X86-FALLBACK0-NEXT: .LBB16_1: 809; X86-FALLBACK0-NEXT: movl %edi, %eax 810; X86-FALLBACK0-NEXT: .LBB16_3: 811; X86-FALLBACK0-NEXT: andl (%esi), %eax 812; X86-FALLBACK0-NEXT: andl 4(%esi), %edx 813; X86-FALLBACK0-NEXT: popl %esi 814; X86-FALLBACK0-NEXT: popl %edi 815; X86-FALLBACK0-NEXT: retl 816; 817; X86-FALLBACK1-LABEL: clear_highbits64_c3_load_indexzext: 818; X86-FALLBACK1: # %bb.0: 819; X86-FALLBACK1-NEXT: pushl %edi 820; X86-FALLBACK1-NEXT: pushl %esi 821; X86-FALLBACK1-NEXT: movl {{[0-9]+}}(%esp), %esi 822; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 823; X86-FALLBACK1-NEXT: movl $-1, %edi 824; X86-FALLBACK1-NEXT: movl $-1, %eax 825; X86-FALLBACK1-NEXT: shrl %cl, %eax 826; X86-FALLBACK1-NEXT: xorl %edx, %edx 827; X86-FALLBACK1-NEXT: testb $32, %cl 828; X86-FALLBACK1-NEXT: cmovel %eax, %edx 829; X86-FALLBACK1-NEXT: cmovel %edi, %eax 830; X86-FALLBACK1-NEXT: andl (%esi), %eax 831; X86-FALLBACK1-NEXT: andl 4(%esi), %edx 832; X86-FALLBACK1-NEXT: popl %esi 833; X86-FALLBACK1-NEXT: popl %edi 834; X86-FALLBACK1-NEXT: retl 835; 836; X86-FALLBACK2-LABEL: clear_highbits64_c3_load_indexzext: 837; X86-FALLBACK2: # %bb.0: 838; X86-FALLBACK2-NEXT: pushl %edi 839; X86-FALLBACK2-NEXT: pushl %esi 840; X86-FALLBACK2-NEXT: movl {{[0-9]+}}(%esp), %esi 841; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 842; X86-FALLBACK2-NEXT: movl $-1, %edi 843; X86-FALLBACK2-NEXT: movl $-1, %eax 844; X86-FALLBACK2-NEXT: shrl %cl, %eax 845; X86-FALLBACK2-NEXT: xorl %edx, %edx 846; X86-FALLBACK2-NEXT: testb $32, %cl 847; X86-FALLBACK2-NEXT: cmovel %eax, %edx 848; X86-FALLBACK2-NEXT: cmovel %edi, %eax 849; X86-FALLBACK2-NEXT: andl (%esi), %eax 850; X86-FALLBACK2-NEXT: andl 4(%esi), %edx 851; X86-FALLBACK2-NEXT: popl %esi 852; X86-FALLBACK2-NEXT: popl %edi 853; X86-FALLBACK2-NEXT: retl 854; 855; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext: 856; X86-BMI2: # %bb.0: 857; X86-BMI2-NEXT: pushl %ebx 858; X86-BMI2-NEXT: pushl %esi 859; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 860; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl 861; X86-BMI2-NEXT: movl $-1, %eax 862; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi 863; X86-BMI2-NEXT: xorl %edx, %edx 864; X86-BMI2-NEXT: testb $32, %bl 865; X86-BMI2-NEXT: cmovel %esi, %edx 866; X86-BMI2-NEXT: cmovnel %esi, %eax 867; X86-BMI2-NEXT: andl (%ecx), %eax 868; X86-BMI2-NEXT: andl 4(%ecx), %edx 869; X86-BMI2-NEXT: popl %esi 870; X86-BMI2-NEXT: popl %ebx 871; X86-BMI2-NEXT: retl 872; 873; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext: 874; X64-NOBMI2: # %bb.0: 875; X64-NOBMI2-NEXT: movl %esi, %ecx 876; X64-NOBMI2-NEXT: movq (%rdi), %rax 877; X64-NOBMI2-NEXT: shlq %cl, %rax 878; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 879; X64-NOBMI2-NEXT: shrq %cl, %rax 880; X64-NOBMI2-NEXT: retq 881; 882; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext: 883; X64-BMI2: # %bb.0: 884; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 885; X64-BMI2-NEXT: shlxq %rsi, (%rdi), %rax 886; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 887; X64-BMI2-NEXT: retq 888 %val = load i64, i64* %w 889 %sh_prom = zext i8 %numhighbits to i64 890 %mask = lshr i64 -1, %sh_prom 891 %masked = and i64 %mask, %val 892 ret i64 %masked 893} 894 895define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind { 896; X86-FALLBACK0-LABEL: clear_highbits64_c4_commutative: 897; X86-FALLBACK0: # %bb.0: 898; X86-FALLBACK0-NEXT: pushl %esi 899; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 900; X86-FALLBACK0-NEXT: movl $-1, %eax 901; X86-FALLBACK0-NEXT: movl $-1, %esi 902; X86-FALLBACK0-NEXT: shrl %cl, %esi 903; X86-FALLBACK0-NEXT: xorl %edx, %edx 904; X86-FALLBACK0-NEXT: testb $32, %cl 905; X86-FALLBACK0-NEXT: jne .LBB17_1 906; X86-FALLBACK0-NEXT: # %bb.2: 907; X86-FALLBACK0-NEXT: movl %esi, %edx 908; X86-FALLBACK0-NEXT: jmp .LBB17_3 909; X86-FALLBACK0-NEXT: .LBB17_1: 910; X86-FALLBACK0-NEXT: movl %esi, %eax 911; X86-FALLBACK0-NEXT: .LBB17_3: 912; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %eax 913; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edx 914; X86-FALLBACK0-NEXT: popl %esi 915; X86-FALLBACK0-NEXT: retl 916; 917; X86-FALLBACK1-LABEL: clear_highbits64_c4_commutative: 918; X86-FALLBACK1: # %bb.0: 919; X86-FALLBACK1-NEXT: pushl %esi 920; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 921; X86-FALLBACK1-NEXT: movl $-1, %esi 922; X86-FALLBACK1-NEXT: movl $-1, %eax 923; X86-FALLBACK1-NEXT: shrl %cl, %eax 924; X86-FALLBACK1-NEXT: xorl %edx, %edx 925; X86-FALLBACK1-NEXT: testb $32, %cl 926; X86-FALLBACK1-NEXT: cmovel %eax, %edx 927; X86-FALLBACK1-NEXT: cmovel %esi, %eax 928; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %eax 929; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edx 930; X86-FALLBACK1-NEXT: popl %esi 931; X86-FALLBACK1-NEXT: retl 932; 933; X86-FALLBACK2-LABEL: clear_highbits64_c4_commutative: 934; X86-FALLBACK2: # %bb.0: 935; X86-FALLBACK2-NEXT: pushl %esi 936; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 937; X86-FALLBACK2-NEXT: movl $-1, %esi 938; X86-FALLBACK2-NEXT: movl $-1, %eax 939; X86-FALLBACK2-NEXT: shrl %cl, %eax 940; X86-FALLBACK2-NEXT: xorl %edx, %edx 941; X86-FALLBACK2-NEXT: testb $32, %cl 942; X86-FALLBACK2-NEXT: cmovel %eax, %edx 943; X86-FALLBACK2-NEXT: cmovel %esi, %eax 944; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %eax 945; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edx 946; X86-FALLBACK2-NEXT: popl %esi 947; X86-FALLBACK2-NEXT: retl 948; 949; X86-BMI2-LABEL: clear_highbits64_c4_commutative: 950; X86-BMI2: # %bb.0: 951; X86-BMI2-NEXT: pushl %esi 952; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 953; X86-BMI2-NEXT: movl $-1, %eax 954; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 955; X86-BMI2-NEXT: xorl %edx, %edx 956; X86-BMI2-NEXT: testb $32, %cl 957; X86-BMI2-NEXT: cmovel %esi, %edx 958; X86-BMI2-NEXT: cmovnel %esi, %eax 959; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 960; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 961; X86-BMI2-NEXT: popl %esi 962; X86-BMI2-NEXT: retl 963; 964; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative: 965; X64-NOBMI2: # %bb.0: 966; X64-NOBMI2-NEXT: movq %rsi, %rcx 967; X64-NOBMI2-NEXT: movq %rdi, %rax 968; X64-NOBMI2-NEXT: shlq %cl, %rax 969; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 970; X64-NOBMI2-NEXT: shrq %cl, %rax 971; X64-NOBMI2-NEXT: retq 972; 973; X64-BMI2-LABEL: clear_highbits64_c4_commutative: 974; X64-BMI2: # %bb.0: 975; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax 976; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 977; X64-BMI2-NEXT: retq 978 %mask = lshr i64 -1, %numhighbits 979 %masked = and i64 %val, %mask ; swapped order 980 ret i64 %masked 981} 982 983; ---------------------------------------------------------------------------- ; 984; Multi-use tests 985; ---------------------------------------------------------------------------- ; 986 987declare void @use32(i32) 988declare void @use64(i64) 989 990define i32 @oneuse32(i32 %val, i32 %numhighbits) nounwind { 991; X86-NOBMI2-LABEL: oneuse32: 992; X86-NOBMI2: # %bb.0: 993; X86-NOBMI2-NEXT: pushl %esi 994; X86-NOBMI2-NEXT: subl $8, %esp 995; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl 996; X86-NOBMI2-NEXT: movl $-1, %esi 997; X86-NOBMI2-NEXT: shrl %cl, %esi 998; X86-NOBMI2-NEXT: movl %esi, (%esp) 999; X86-NOBMI2-NEXT: calll use32 1000; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1001; X86-NOBMI2-NEXT: movl %esi, %eax 1002; X86-NOBMI2-NEXT: addl $8, %esp 1003; X86-NOBMI2-NEXT: popl %esi 1004; X86-NOBMI2-NEXT: retl 1005; 1006; X86-BMI2-LABEL: oneuse32: 1007; X86-BMI2: # %bb.0: 1008; X86-BMI2-NEXT: pushl %esi 1009; X86-BMI2-NEXT: subl $8, %esp 1010; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1011; X86-BMI2-NEXT: movl $-1, %ecx 1012; X86-BMI2-NEXT: shrxl %eax, %ecx, %esi 1013; X86-BMI2-NEXT: movl %esi, (%esp) 1014; X86-BMI2-NEXT: calll use32 1015; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1016; X86-BMI2-NEXT: movl %esi, %eax 1017; X86-BMI2-NEXT: addl $8, %esp 1018; X86-BMI2-NEXT: popl %esi 1019; X86-BMI2-NEXT: retl 1020; 1021; X64-NOBMI2-LABEL: oneuse32: 1022; X64-NOBMI2: # %bb.0: 1023; X64-NOBMI2-NEXT: pushq %rbp 1024; X64-NOBMI2-NEXT: pushq %rbx 1025; X64-NOBMI2-NEXT: pushq %rax 1026; X64-NOBMI2-NEXT: movl %esi, %ecx 1027; X64-NOBMI2-NEXT: movl %edi, %ebx 1028; X64-NOBMI2-NEXT: movl $-1, %ebp 1029; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1030; X64-NOBMI2-NEXT: shrl %cl, %ebp 1031; X64-NOBMI2-NEXT: movl %ebp, %edi 1032; X64-NOBMI2-NEXT: callq use32 1033; X64-NOBMI2-NEXT: andl %ebx, %ebp 1034; X64-NOBMI2-NEXT: movl %ebp, %eax 1035; X64-NOBMI2-NEXT: addq $8, %rsp 1036; X64-NOBMI2-NEXT: popq %rbx 1037; X64-NOBMI2-NEXT: popq %rbp 1038; X64-NOBMI2-NEXT: retq 1039; 1040; X64-BMI2-LABEL: oneuse32: 1041; X64-BMI2: # %bb.0: 1042; X64-BMI2-NEXT: pushq %rbp 1043; X64-BMI2-NEXT: pushq %rbx 1044; X64-BMI2-NEXT: pushq %rax 1045; X64-BMI2-NEXT: movl %edi, %ebx 1046; X64-BMI2-NEXT: movl $-1, %eax 1047; X64-BMI2-NEXT: shrxl %esi, %eax, %ebp 1048; X64-BMI2-NEXT: movl %ebp, %edi 1049; X64-BMI2-NEXT: callq use32 1050; X64-BMI2-NEXT: andl %ebx, %ebp 1051; X64-BMI2-NEXT: movl %ebp, %eax 1052; X64-BMI2-NEXT: addq $8, %rsp 1053; X64-BMI2-NEXT: popq %rbx 1054; X64-BMI2-NEXT: popq %rbp 1055; X64-BMI2-NEXT: retq 1056 %mask = lshr i32 -1, %numhighbits 1057 call void @use32(i32 %mask) 1058 %masked = and i32 %mask, %val 1059 ret i32 %masked 1060} 1061 1062define i64 @oneuse64(i64 %val, i64 %numhighbits) nounwind { 1063; X86-FALLBACK0-LABEL: oneuse64: 1064; X86-FALLBACK0: # %bb.0: 1065; X86-FALLBACK0-NEXT: pushl %edi 1066; X86-FALLBACK0-NEXT: pushl %esi 1067; X86-FALLBACK0-NEXT: pushl %eax 1068; X86-FALLBACK0-NEXT: movb {{[0-9]+}}(%esp), %cl 1069; X86-FALLBACK0-NEXT: movl $-1, %esi 1070; X86-FALLBACK0-NEXT: movl $-1, %edi 1071; X86-FALLBACK0-NEXT: shrl %cl, %edi 1072; X86-FALLBACK0-NEXT: testb $32, %cl 1073; X86-FALLBACK0-NEXT: je .LBB19_2 1074; X86-FALLBACK0-NEXT: # %bb.1: 1075; X86-FALLBACK0-NEXT: movl %edi, %esi 1076; X86-FALLBACK0-NEXT: xorl %edi, %edi 1077; X86-FALLBACK0-NEXT: .LBB19_2: 1078; X86-FALLBACK0-NEXT: subl $8, %esp 1079; X86-FALLBACK0-NEXT: pushl %edi 1080; X86-FALLBACK0-NEXT: pushl %esi 1081; X86-FALLBACK0-NEXT: calll use64 1082; X86-FALLBACK0-NEXT: addl $16, %esp 1083; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %esi 1084; X86-FALLBACK0-NEXT: andl {{[0-9]+}}(%esp), %edi 1085; X86-FALLBACK0-NEXT: movl %esi, %eax 1086; X86-FALLBACK0-NEXT: movl %edi, %edx 1087; X86-FALLBACK0-NEXT: addl $4, %esp 1088; X86-FALLBACK0-NEXT: popl %esi 1089; X86-FALLBACK0-NEXT: popl %edi 1090; X86-FALLBACK0-NEXT: retl 1091; 1092; X86-FALLBACK1-LABEL: oneuse64: 1093; X86-FALLBACK1: # %bb.0: 1094; X86-FALLBACK1-NEXT: pushl %edi 1095; X86-FALLBACK1-NEXT: pushl %esi 1096; X86-FALLBACK1-NEXT: pushl %eax 1097; X86-FALLBACK1-NEXT: movb {{[0-9]+}}(%esp), %cl 1098; X86-FALLBACK1-NEXT: movl $-1, %esi 1099; X86-FALLBACK1-NEXT: movl $-1, %eax 1100; X86-FALLBACK1-NEXT: shrl %cl, %eax 1101; X86-FALLBACK1-NEXT: xorl %edi, %edi 1102; X86-FALLBACK1-NEXT: testb $32, %cl 1103; X86-FALLBACK1-NEXT: cmovnel %eax, %esi 1104; X86-FALLBACK1-NEXT: cmovel %eax, %edi 1105; X86-FALLBACK1-NEXT: subl $8, %esp 1106; X86-FALLBACK1-NEXT: pushl %edi 1107; X86-FALLBACK1-NEXT: pushl %esi 1108; X86-FALLBACK1-NEXT: calll use64 1109; X86-FALLBACK1-NEXT: addl $16, %esp 1110; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %esi 1111; X86-FALLBACK1-NEXT: andl {{[0-9]+}}(%esp), %edi 1112; X86-FALLBACK1-NEXT: movl %esi, %eax 1113; X86-FALLBACK1-NEXT: movl %edi, %edx 1114; X86-FALLBACK1-NEXT: addl $4, %esp 1115; X86-FALLBACK1-NEXT: popl %esi 1116; X86-FALLBACK1-NEXT: popl %edi 1117; X86-FALLBACK1-NEXT: retl 1118; 1119; X86-FALLBACK2-LABEL: oneuse64: 1120; X86-FALLBACK2: # %bb.0: 1121; X86-FALLBACK2-NEXT: pushl %edi 1122; X86-FALLBACK2-NEXT: pushl %esi 1123; X86-FALLBACK2-NEXT: pushl %eax 1124; X86-FALLBACK2-NEXT: movb {{[0-9]+}}(%esp), %cl 1125; X86-FALLBACK2-NEXT: movl $-1, %esi 1126; X86-FALLBACK2-NEXT: movl $-1, %eax 1127; X86-FALLBACK2-NEXT: shrl %cl, %eax 1128; X86-FALLBACK2-NEXT: xorl %edi, %edi 1129; X86-FALLBACK2-NEXT: testb $32, %cl 1130; X86-FALLBACK2-NEXT: cmovnel %eax, %esi 1131; X86-FALLBACK2-NEXT: cmovel %eax, %edi 1132; X86-FALLBACK2-NEXT: subl $8, %esp 1133; X86-FALLBACK2-NEXT: pushl %edi 1134; X86-FALLBACK2-NEXT: pushl %esi 1135; X86-FALLBACK2-NEXT: calll use64 1136; X86-FALLBACK2-NEXT: addl $16, %esp 1137; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %esi 1138; X86-FALLBACK2-NEXT: andl {{[0-9]+}}(%esp), %edi 1139; X86-FALLBACK2-NEXT: movl %esi, %eax 1140; X86-FALLBACK2-NEXT: movl %edi, %edx 1141; X86-FALLBACK2-NEXT: addl $4, %esp 1142; X86-FALLBACK2-NEXT: popl %esi 1143; X86-FALLBACK2-NEXT: popl %edi 1144; X86-FALLBACK2-NEXT: retl 1145; 1146; X86-BMI2-LABEL: oneuse64: 1147; X86-BMI2: # %bb.0: 1148; X86-BMI2-NEXT: pushl %edi 1149; X86-BMI2-NEXT: pushl %esi 1150; X86-BMI2-NEXT: pushl %eax 1151; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al 1152; X86-BMI2-NEXT: movl $-1, %esi 1153; X86-BMI2-NEXT: shrxl %eax, %esi, %ecx 1154; X86-BMI2-NEXT: xorl %edi, %edi 1155; X86-BMI2-NEXT: testb $32, %al 1156; X86-BMI2-NEXT: cmovnel %ecx, %esi 1157; X86-BMI2-NEXT: cmovel %ecx, %edi 1158; X86-BMI2-NEXT: subl $8, %esp 1159; X86-BMI2-NEXT: pushl %edi 1160; X86-BMI2-NEXT: pushl %esi 1161; X86-BMI2-NEXT: calll use64 1162; X86-BMI2-NEXT: addl $16, %esp 1163; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi 1164; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi 1165; X86-BMI2-NEXT: movl %esi, %eax 1166; X86-BMI2-NEXT: movl %edi, %edx 1167; X86-BMI2-NEXT: addl $4, %esp 1168; X86-BMI2-NEXT: popl %esi 1169; X86-BMI2-NEXT: popl %edi 1170; X86-BMI2-NEXT: retl 1171; 1172; X64-NOBMI2-LABEL: oneuse64: 1173; X64-NOBMI2: # %bb.0: 1174; X64-NOBMI2-NEXT: pushq %r14 1175; X64-NOBMI2-NEXT: pushq %rbx 1176; X64-NOBMI2-NEXT: pushq %rax 1177; X64-NOBMI2-NEXT: movq %rsi, %rcx 1178; X64-NOBMI2-NEXT: movq %rdi, %r14 1179; X64-NOBMI2-NEXT: movq $-1, %rbx 1180; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 1181; X64-NOBMI2-NEXT: shrq %cl, %rbx 1182; X64-NOBMI2-NEXT: movq %rbx, %rdi 1183; X64-NOBMI2-NEXT: callq use64 1184; X64-NOBMI2-NEXT: andq %r14, %rbx 1185; X64-NOBMI2-NEXT: movq %rbx, %rax 1186; X64-NOBMI2-NEXT: addq $8, %rsp 1187; X64-NOBMI2-NEXT: popq %rbx 1188; X64-NOBMI2-NEXT: popq %r14 1189; X64-NOBMI2-NEXT: retq 1190; 1191; X64-BMI2-LABEL: oneuse64: 1192; X64-BMI2: # %bb.0: 1193; X64-BMI2-NEXT: pushq %r14 1194; X64-BMI2-NEXT: pushq %rbx 1195; X64-BMI2-NEXT: pushq %rax 1196; X64-BMI2-NEXT: movq %rdi, %r14 1197; X64-BMI2-NEXT: movq $-1, %rax 1198; X64-BMI2-NEXT: shrxq %rsi, %rax, %rbx 1199; X64-BMI2-NEXT: movq %rbx, %rdi 1200; X64-BMI2-NEXT: callq use64 1201; X64-BMI2-NEXT: andq %r14, %rbx 1202; X64-BMI2-NEXT: movq %rbx, %rax 1203; X64-BMI2-NEXT: addq $8, %rsp 1204; X64-BMI2-NEXT: popq %rbx 1205; X64-BMI2-NEXT: popq %r14 1206; X64-BMI2-NEXT: retq 1207 %mask = lshr i64 -1, %numhighbits 1208 call void @use64(i64 %mask) 1209 %masked = and i64 %mask, %val 1210 ret i64 %masked 1211} 1212