1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c 6 7; 8; AMD Intrinsics 9; 10 11define i16 @test__tzcnt_u16(i16 %a0) { 12; X32-LABEL: test__tzcnt_u16: 13; X32: # BB#0: 14; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 15; X32-NEXT: movzwl %ax, %ecx 16; X32-NEXT: cmpl $0, %ecx 17; X32-NEXT: jne .LBB0_1 18; X32-NEXT: # BB#2: 19; X32-NEXT: movw $16, %ax 20; X32-NEXT: retl 21; X32-NEXT: .LBB0_1: 22; X32-NEXT: tzcntw %ax, %ax 23; X32-NEXT: retl 24; 25; X64-LABEL: test__tzcnt_u16: 26; X64: # BB#0: 27; X64-NEXT: movw $16, %cx 28; X64-NEXT: movzwl %di, %edx 29; X64-NEXT: tzcntw %dx, %ax 30; X64-NEXT: cmpl $0, %edx 31; X64-NEXT: cmovew %cx, %ax 32; X64-NEXT: retq 33 %zext = zext i16 %a0 to i32 34 %cmp = icmp ne i32 %zext, 0 35 %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 true) 36 %res = select i1 %cmp, i16 %cttz, i16 16 37 ret i16 %res 38} 39 40define i32 @test__andn_u32(i32 %a0, i32 %a1) { 41; X32-LABEL: test__andn_u32: 42; X32: # BB#0: 43; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 44; X32-NEXT: xorl $-1, %eax 45; X32-NEXT: andl {{[0-9]+}}(%esp), %eax 46; X32-NEXT: retl 47; 48; X64-LABEL: test__andn_u32: 49; X64: # BB#0: 50; X64-NEXT: xorl $-1, %edi 51; X64-NEXT: andl %esi, %edi 52; X64-NEXT: movl %edi, %eax 53; X64-NEXT: retq 54 %xor = xor i32 %a0, -1 55 %res = and i32 %xor, %a1 56 ret i32 %res 57} 58 59define i32 @test__bextr_u32(i32 %a0, i32 %a1) { 60; X32-LABEL: test__bextr_u32: 61; X32: # BB#0: 62; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 63; X32-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 64; X32-NEXT: retl 65; 66; X64-LABEL: test__bextr_u32: 67; X64: # BB#0: 68; X64-NEXT: bextrl %esi, %edi, %eax 69; X64-NEXT: retq 70 %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1) 71 ret i32 %res 72} 73 74define i32 @test__blsi_u32(i32 %a0) { 75; X32-LABEL: test__blsi_u32: 76; X32: # BB#0: 77; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 78; X32-NEXT: xorl %eax, %eax 79; X32-NEXT: subl %ecx, %eax 80; X32-NEXT: andl %ecx, %eax 81; X32-NEXT: retl 82; 83; X64-LABEL: test__blsi_u32: 84; X64: # BB#0: 85; X64-NEXT: xorl %eax, %eax 86; X64-NEXT: subl %edi, %eax 87; X64-NEXT: andl %edi, %eax 88; X64-NEXT: retq 89 %neg = sub i32 0, %a0 90 %res = and i32 %a0, %neg 91 ret i32 %res 92} 93 94define i32 @test__blsmsk_u32(i32 %a0) { 95; X32-LABEL: test__blsmsk_u32: 96; X32: # BB#0: 97; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 98; X32-NEXT: movl %ecx, %eax 99; X32-NEXT: subl $1, %eax 100; X32-NEXT: xorl %ecx, %eax 101; X32-NEXT: retl 102; 103; X64-LABEL: test__blsmsk_u32: 104; X64: # BB#0: 105; X64-NEXT: movl %edi, %eax 106; X64-NEXT: subl $1, %eax 107; X64-NEXT: xorl %edi, %eax 108; X64-NEXT: retq 109 %dec = sub i32 %a0, 1 110 %res = xor i32 %a0, %dec 111 ret i32 %res 112} 113 114define i32 @test__blsr_u32(i32 %a0) { 115; X32-LABEL: test__blsr_u32: 116; X32: # BB#0: 117; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 118; X32-NEXT: movl %ecx, %eax 119; X32-NEXT: subl $1, %eax 120; X32-NEXT: andl %ecx, %eax 121; X32-NEXT: retl 122; 123; X64-LABEL: test__blsr_u32: 124; X64: # BB#0: 125; X64-NEXT: movl %edi, %eax 126; X64-NEXT: subl $1, %eax 127; X64-NEXT: andl %edi, %eax 128; X64-NEXT: retq 129 %dec = sub i32 %a0, 1 130 %res = and i32 %a0, %dec 131 ret i32 %res 132} 133 134define i32 @test__tzcnt_u32(i32 %a0) { 135; X32-LABEL: test__tzcnt_u32: 136; X32: # BB#0: 137; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 138; X32-NEXT: cmpl $0, %eax 139; X32-NEXT: jne .LBB6_1 140; X32-NEXT: # BB#2: 141; X32-NEXT: movl $32, %eax 142; X32-NEXT: retl 143; X32-NEXT: .LBB6_1: 144; X32-NEXT: tzcntl %eax, %eax 145; X32-NEXT: retl 146; 147; X64-LABEL: test__tzcnt_u32: 148; X64: # BB#0: 149; X64-NEXT: movl $32, %ecx 150; X64-NEXT: tzcntl %edi, %eax 151; X64-NEXT: cmovbl %ecx, %eax 152; X64-NEXT: retq 153 %cmp = icmp ne i32 %a0, 0 154 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true) 155 %res = select i1 %cmp, i32 %cttz, i32 32 156 ret i32 %res 157} 158 159; 160; Intel intrinsics 161; 162 163define i16 @test_tzcnt_u16(i16 %a0) { 164; X32-LABEL: test_tzcnt_u16: 165; X32: # BB#0: 166; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 167; X32-NEXT: movzwl %ax, %ecx 168; X32-NEXT: cmpl $0, %ecx 169; X32-NEXT: jne .LBB7_1 170; X32-NEXT: # BB#2: 171; X32-NEXT: movw $16, %ax 172; X32-NEXT: retl 173; X32-NEXT: .LBB7_1: 174; X32-NEXT: tzcntw %ax, %ax 175; X32-NEXT: retl 176; 177; X64-LABEL: test_tzcnt_u16: 178; X64: # BB#0: 179; X64-NEXT: movw $16, %cx 180; X64-NEXT: movzwl %di, %edx 181; X64-NEXT: tzcntw %dx, %ax 182; X64-NEXT: cmpl $0, %edx 183; X64-NEXT: cmovew %cx, %ax 184; X64-NEXT: retq 185 %zext = zext i16 %a0 to i32 186 %cmp = icmp ne i32 %zext, 0 187 %cttz = call i16 @llvm.cttz.i16(i16 %a0, i1 true) 188 %res = select i1 %cmp, i16 %cttz, i16 16 189 ret i16 %res 190} 191 192define i32 @test_andn_u32(i32 %a0, i32 %a1) { 193; X32-LABEL: test_andn_u32: 194; X32: # BB#0: 195; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X32-NEXT: xorl $-1, %eax 197; X32-NEXT: andl {{[0-9]+}}(%esp), %eax 198; X32-NEXT: retl 199; 200; X64-LABEL: test_andn_u32: 201; X64: # BB#0: 202; X64-NEXT: xorl $-1, %edi 203; X64-NEXT: andl %esi, %edi 204; X64-NEXT: movl %edi, %eax 205; X64-NEXT: retq 206 %xor = xor i32 %a0, -1 207 %res = and i32 %xor, %a1 208 ret i32 %res 209} 210 211define i32 @test_bextr_u32(i32 %a0, i32 %a1, i32 %a2) { 212; X32-LABEL: test_bextr_u32: 213; X32: # BB#0: 214; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 215; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 216; X32-NEXT: andl $255, %ecx 217; X32-NEXT: andl $255, %eax 218; X32-NEXT: shll $8, %eax 219; X32-NEXT: orl %ecx, %eax 220; X32-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 221; X32-NEXT: retl 222; 223; X64-LABEL: test_bextr_u32: 224; X64: # BB#0: 225; X64-NEXT: andl $255, %esi 226; X64-NEXT: andl $255, %edx 227; X64-NEXT: shll $8, %edx 228; X64-NEXT: orl %esi, %edx 229; X64-NEXT: bextrl %edx, %edi, %eax 230; X64-NEXT: retq 231 %and1 = and i32 %a1, 255 232 %and2 = and i32 %a2, 255 233 %shl = shl i32 %and2, 8 234 %or = or i32 %and1, %shl 235 %res = call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %or) 236 ret i32 %res 237} 238 239define i32 @test_blsi_u32(i32 %a0) { 240; X32-LABEL: test_blsi_u32: 241; X32: # BB#0: 242; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 243; X32-NEXT: xorl %eax, %eax 244; X32-NEXT: subl %ecx, %eax 245; X32-NEXT: andl %ecx, %eax 246; X32-NEXT: retl 247; 248; X64-LABEL: test_blsi_u32: 249; X64: # BB#0: 250; X64-NEXT: xorl %eax, %eax 251; X64-NEXT: subl %edi, %eax 252; X64-NEXT: andl %edi, %eax 253; X64-NEXT: retq 254 %neg = sub i32 0, %a0 255 %res = and i32 %a0, %neg 256 ret i32 %res 257} 258 259define i32 @test_blsmsk_u32(i32 %a0) { 260; X32-LABEL: test_blsmsk_u32: 261; X32: # BB#0: 262; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 263; X32-NEXT: movl %ecx, %eax 264; X32-NEXT: subl $1, %eax 265; X32-NEXT: xorl %ecx, %eax 266; X32-NEXT: retl 267; 268; X64-LABEL: test_blsmsk_u32: 269; X64: # BB#0: 270; X64-NEXT: movl %edi, %eax 271; X64-NEXT: subl $1, %eax 272; X64-NEXT: xorl %edi, %eax 273; X64-NEXT: retq 274 %dec = sub i32 %a0, 1 275 %res = xor i32 %a0, %dec 276 ret i32 %res 277} 278 279define i32 @test_blsr_u32(i32 %a0) { 280; X32-LABEL: test_blsr_u32: 281; X32: # BB#0: 282; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 283; X32-NEXT: movl %ecx, %eax 284; X32-NEXT: subl $1, %eax 285; X32-NEXT: andl %ecx, %eax 286; X32-NEXT: retl 287; 288; X64-LABEL: test_blsr_u32: 289; X64: # BB#0: 290; X64-NEXT: movl %edi, %eax 291; X64-NEXT: subl $1, %eax 292; X64-NEXT: andl %edi, %eax 293; X64-NEXT: retq 294 %dec = sub i32 %a0, 1 295 %res = and i32 %a0, %dec 296 ret i32 %res 297} 298 299define i32 @test_tzcnt_u32(i32 %a0) { 300; X32-LABEL: test_tzcnt_u32: 301; X32: # BB#0: 302; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 303; X32-NEXT: cmpl $0, %eax 304; X32-NEXT: jne .LBB13_1 305; X32-NEXT: # BB#2: 306; X32-NEXT: movl $32, %eax 307; X32-NEXT: retl 308; X32-NEXT: .LBB13_1: 309; X32-NEXT: tzcntl %eax, %eax 310; X32-NEXT: retl 311; 312; X64-LABEL: test_tzcnt_u32: 313; X64: # BB#0: 314; X64-NEXT: movl $32, %ecx 315; X64-NEXT: tzcntl %edi, %eax 316; X64-NEXT: cmovbl %ecx, %eax 317; X64-NEXT: retq 318 %cmp = icmp ne i32 %a0, 0 319 %cttz = call i32 @llvm.cttz.i32(i32 %a0, i1 true) 320 %res = select i1 %cmp, i32 %cttz, i32 32 321 ret i32 %res 322} 323 324declare i16 @llvm.cttz.i16(i16, i1) 325declare i32 @llvm.cttz.i32(i32, i1) 326declare i32 @llvm.x86.bmi.bextr.32(i32, i32) 327