1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64 4 5define i64 @t0(<1 x i64>* %a, i32* %b) nounwind { 6; X86-LABEL: t0: 7; X86: # %bb.0: # %entry 8; X86-NEXT: pushl %ebp 9; X86-NEXT: movl %esp, %ebp 10; X86-NEXT: andl $-8, %esp 11; X86-NEXT: subl $8, %esp 12; X86-NEXT: movl 12(%ebp), %eax 13; X86-NEXT: movl 8(%ebp), %ecx 14; X86-NEXT: movq (%ecx), %mm0 15; X86-NEXT: movd (%eax), %mm1 16; X86-NEXT: psllq %mm1, %mm0 17; X86-NEXT: movq %mm0, (%esp) 18; X86-NEXT: movl (%esp), %eax 19; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 20; X86-NEXT: movl %ebp, %esp 21; X86-NEXT: popl %ebp 22; X86-NEXT: retl 23; 24; X64-LABEL: t0: 25; X64: # %bb.0: # %entry 26; X64-NEXT: movq (%rdi), %mm0 27; X64-NEXT: movd (%rsi), %mm1 28; X64-NEXT: psllq %mm1, %mm0 29; X64-NEXT: movq %mm0, %rax 30; X64-NEXT: retq 31entry: 32 %0 = bitcast <1 x i64>* %a to x86_mmx* 33 %1 = load x86_mmx, x86_mmx* %0, align 8 34 %2 = load i32, i32* %b, align 4 35 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2) 36 %4 = bitcast x86_mmx %3 to i64 37 ret i64 %4 38} 39declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) 40 41define i64 @t1(<1 x i64>* %a, i32* %b) nounwind { 42; X86-LABEL: t1: 43; X86: # %bb.0: # %entry 44; X86-NEXT: pushl %ebp 45; X86-NEXT: movl %esp, %ebp 46; X86-NEXT: andl $-8, %esp 47; X86-NEXT: subl $8, %esp 48; X86-NEXT: movl 12(%ebp), %eax 49; X86-NEXT: movl 8(%ebp), %ecx 50; X86-NEXT: movq (%ecx), %mm0 51; X86-NEXT: movd (%eax), %mm1 52; X86-NEXT: psrlq %mm1, %mm0 53; X86-NEXT: movq %mm0, (%esp) 54; X86-NEXT: movl (%esp), %eax 55; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 56; X86-NEXT: movl %ebp, %esp 57; X86-NEXT: popl %ebp 58; X86-NEXT: retl 59; 60; X64-LABEL: t1: 61; X64: # %bb.0: # %entry 62; X64-NEXT: movq (%rdi), %mm0 63; X64-NEXT: movd (%rsi), %mm1 64; X64-NEXT: psrlq %mm1, %mm0 65; X64-NEXT: movq %mm0, %rax 66; X64-NEXT: retq 67entry: 68 %0 = bitcast <1 x i64>* %a to x86_mmx* 69 %1 = load x86_mmx, x86_mmx* %0, align 8 70 %2 = load i32, i32* %b, align 4 71 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %1, i32 %2) 72 %4 = bitcast x86_mmx %3 to i64 73 ret i64 %4 74} 75declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) 76 77define i64 @t2(<1 x i64>* %a, i32* %b) nounwind { 78; X86-LABEL: t2: 79; X86: # %bb.0: # %entry 80; X86-NEXT: pushl %ebp 81; X86-NEXT: movl %esp, %ebp 82; X86-NEXT: andl $-8, %esp 83; X86-NEXT: subl $8, %esp 84; X86-NEXT: movl 12(%ebp), %eax 85; X86-NEXT: movl 8(%ebp), %ecx 86; X86-NEXT: movq (%ecx), %mm0 87; X86-NEXT: movd (%eax), %mm1 88; X86-NEXT: psllw %mm1, %mm0 89; X86-NEXT: movq %mm0, (%esp) 90; X86-NEXT: movl (%esp), %eax 91; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 92; X86-NEXT: movl %ebp, %esp 93; X86-NEXT: popl %ebp 94; X86-NEXT: retl 95; 96; X64-LABEL: t2: 97; X64: # %bb.0: # %entry 98; X64-NEXT: movq (%rdi), %mm0 99; X64-NEXT: movd (%rsi), %mm1 100; X64-NEXT: psllw %mm1, %mm0 101; X64-NEXT: movq %mm0, %rax 102; X64-NEXT: retq 103entry: 104 %0 = bitcast <1 x i64>* %a to x86_mmx* 105 %1 = load x86_mmx, x86_mmx* %0, align 8 106 %2 = load i32, i32* %b, align 4 107 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %1, i32 %2) 108 %4 = bitcast x86_mmx %3 to i64 109 ret i64 %4 110} 111declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) 112 113define i64 @t3(<1 x i64>* %a, i32* %b) nounwind { 114; X86-LABEL: t3: 115; X86: # %bb.0: # %entry 116; X86-NEXT: pushl %ebp 117; X86-NEXT: movl %esp, %ebp 118; X86-NEXT: andl $-8, %esp 119; X86-NEXT: subl $8, %esp 120; X86-NEXT: movl 12(%ebp), %eax 121; X86-NEXT: movl 8(%ebp), %ecx 122; X86-NEXT: movq (%ecx), %mm0 123; X86-NEXT: movd (%eax), %mm1 124; X86-NEXT: psrlw %mm1, %mm0 125; X86-NEXT: movq %mm0, (%esp) 126; X86-NEXT: movl (%esp), %eax 127; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 128; X86-NEXT: movl %ebp, %esp 129; X86-NEXT: popl %ebp 130; X86-NEXT: retl 131; 132; X64-LABEL: t3: 133; X64: # %bb.0: # %entry 134; X64-NEXT: movq (%rdi), %mm0 135; X64-NEXT: movd (%rsi), %mm1 136; X64-NEXT: psrlw %mm1, %mm0 137; X64-NEXT: movq %mm0, %rax 138; X64-NEXT: retq 139entry: 140 %0 = bitcast <1 x i64>* %a to x86_mmx* 141 %1 = load x86_mmx, x86_mmx* %0, align 8 142 %2 = load i32, i32* %b, align 4 143 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %1, i32 %2) 144 %4 = bitcast x86_mmx %3 to i64 145 ret i64 %4 146} 147declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) 148 149define i64 @t4(<1 x i64>* %a, i32* %b) nounwind { 150; X86-LABEL: t4: 151; X86: # %bb.0: # %entry 152; X86-NEXT: pushl %ebp 153; X86-NEXT: movl %esp, %ebp 154; X86-NEXT: andl $-8, %esp 155; X86-NEXT: subl $8, %esp 156; X86-NEXT: movl 12(%ebp), %eax 157; X86-NEXT: movl 8(%ebp), %ecx 158; X86-NEXT: movq (%ecx), %mm0 159; X86-NEXT: movd (%eax), %mm1 160; X86-NEXT: pslld %mm1, %mm0 161; X86-NEXT: movq %mm0, (%esp) 162; X86-NEXT: movl (%esp), %eax 163; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 164; X86-NEXT: movl %ebp, %esp 165; X86-NEXT: popl %ebp 166; X86-NEXT: retl 167; 168; X64-LABEL: t4: 169; X64: # %bb.0: # %entry 170; X64-NEXT: movq (%rdi), %mm0 171; X64-NEXT: movd (%rsi), %mm1 172; X64-NEXT: pslld %mm1, %mm0 173; X64-NEXT: movq %mm0, %rax 174; X64-NEXT: retq 175entry: 176 %0 = bitcast <1 x i64>* %a to x86_mmx* 177 %1 = load x86_mmx, x86_mmx* %0, align 8 178 %2 = load i32, i32* %b, align 4 179 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %1, i32 %2) 180 %4 = bitcast x86_mmx %3 to i64 181 ret i64 %4 182} 183declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) 184 185define i64 @t5(<1 x i64>* %a, i32* %b) nounwind { 186; X86-LABEL: t5: 187; X86: # %bb.0: # %entry 188; X86-NEXT: pushl %ebp 189; X86-NEXT: movl %esp, %ebp 190; X86-NEXT: andl $-8, %esp 191; X86-NEXT: subl $8, %esp 192; X86-NEXT: movl 12(%ebp), %eax 193; X86-NEXT: movl 8(%ebp), %ecx 194; X86-NEXT: movq (%ecx), %mm0 195; X86-NEXT: movd (%eax), %mm1 196; X86-NEXT: psrld %mm1, %mm0 197; X86-NEXT: movq %mm0, (%esp) 198; X86-NEXT: movl (%esp), %eax 199; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 200; X86-NEXT: movl %ebp, %esp 201; X86-NEXT: popl %ebp 202; X86-NEXT: retl 203; 204; X64-LABEL: t5: 205; X64: # %bb.0: # %entry 206; X64-NEXT: movq (%rdi), %mm0 207; X64-NEXT: movd (%rsi), %mm1 208; X64-NEXT: psrld %mm1, %mm0 209; X64-NEXT: movq %mm0, %rax 210; X64-NEXT: retq 211entry: 212 %0 = bitcast <1 x i64>* %a to x86_mmx* 213 %1 = load x86_mmx, x86_mmx* %0, align 8 214 %2 = load i32, i32* %b, align 4 215 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %1, i32 %2) 216 %4 = bitcast x86_mmx %3 to i64 217 ret i64 %4 218} 219declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) 220 221define i64 @t6(<1 x i64>* %a, i32* %b) nounwind { 222; X86-LABEL: t6: 223; X86: # %bb.0: # %entry 224; X86-NEXT: pushl %ebp 225; X86-NEXT: movl %esp, %ebp 226; X86-NEXT: andl $-8, %esp 227; X86-NEXT: subl $8, %esp 228; X86-NEXT: movl 12(%ebp), %eax 229; X86-NEXT: movl 8(%ebp), %ecx 230; X86-NEXT: movq (%ecx), %mm0 231; X86-NEXT: movd (%eax), %mm1 232; X86-NEXT: psraw %mm1, %mm0 233; X86-NEXT: movq %mm0, (%esp) 234; X86-NEXT: movl (%esp), %eax 235; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 236; X86-NEXT: movl %ebp, %esp 237; X86-NEXT: popl %ebp 238; X86-NEXT: retl 239; 240; X64-LABEL: t6: 241; X64: # %bb.0: # %entry 242; X64-NEXT: movq (%rdi), %mm0 243; X64-NEXT: movd (%rsi), %mm1 244; X64-NEXT: psraw %mm1, %mm0 245; X64-NEXT: movq %mm0, %rax 246; X64-NEXT: retq 247entry: 248 %0 = bitcast <1 x i64>* %a to x86_mmx* 249 %1 = load x86_mmx, x86_mmx* %0, align 8 250 %2 = load i32, i32* %b, align 4 251 %3 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %1, i32 %2) 252 %4 = bitcast x86_mmx %3 to i64 253 ret i64 %4 254} 255declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) 256 257define i64 @t7(<1 x i64>* %a, i32* %b) nounwind { 258; X86-LABEL: t7: 259; X86: # %bb.0: # %entry 260; X86-NEXT: pushl %ebp 261; X86-NEXT: movl %esp, %ebp 262; X86-NEXT: andl $-8, %esp 263; X86-NEXT: subl $8, %esp 264; X86-NEXT: movl 12(%ebp), %eax 265; X86-NEXT: movl 8(%ebp), %ecx 266; X86-NEXT: movq (%ecx), %mm0 267; X86-NEXT: movd (%eax), %mm1 268; X86-NEXT: psrad %mm1, %mm0 269; X86-NEXT: movq %mm0, (%esp) 270; X86-NEXT: movl (%esp), %eax 271; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 272; X86-NEXT: movl %ebp, %esp 273; X86-NEXT: popl %ebp 274; X86-NEXT: retl 275; 276; X64-LABEL: t7: 277; X64: # %bb.0: # %entry 278; X64-NEXT: movq (%rdi), %mm0 279; X64-NEXT: movd (%rsi), %mm1 280; X64-NEXT: psrad %mm1, %mm0 281; X64-NEXT: movq %mm0, %rax 282; X64-NEXT: retq 283entry: 284 %0 = bitcast <1 x i64>* %a to x86_mmx* 285 %1 = load x86_mmx, x86_mmx* %0, align 8 286 %2 = load i32, i32* %b, align 4 287 %3 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %1, i32 %2) 288 %4 = bitcast x86_mmx %3 to i64 289 ret i64 %4 290} 291declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) 292 293define i64 @tt0(x86_mmx %t, x86_mmx* %q) nounwind { 294; X86-LABEL: tt0: 295; X86: # %bb.0: # %entry 296; X86-NEXT: pushl %ebp 297; X86-NEXT: movl %esp, %ebp 298; X86-NEXT: andl $-8, %esp 299; X86-NEXT: subl $8, %esp 300; X86-NEXT: movl 8(%ebp), %eax 301; X86-NEXT: paddb (%eax), %mm0 302; X86-NEXT: movq %mm0, (%esp) 303; X86-NEXT: movl (%esp), %eax 304; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 305; X86-NEXT: emms 306; X86-NEXT: movl %ebp, %esp 307; X86-NEXT: popl %ebp 308; X86-NEXT: retl 309; 310; X64-LABEL: tt0: 311; X64: # %bb.0: # %entry 312; X64-NEXT: paddb (%rdi), %mm0 313; X64-NEXT: movq %mm0, %rax 314; X64-NEXT: emms 315; X64-NEXT: retq 316entry: 317 %v = load x86_mmx, x86_mmx* %q 318 %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v) 319 %s = bitcast x86_mmx %u to i64 320 call void @llvm.x86.mmx.emms() 321 ret i64 %s 322} 323declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) 324declare void @llvm.x86.mmx.emms() 325 326define i64 @tt1(x86_mmx %t, x86_mmx* %q) nounwind { 327; X86-LABEL: tt1: 328; X86: # %bb.0: # %entry 329; X86-NEXT: pushl %ebp 330; X86-NEXT: movl %esp, %ebp 331; X86-NEXT: andl $-8, %esp 332; X86-NEXT: subl $8, %esp 333; X86-NEXT: movl 8(%ebp), %eax 334; X86-NEXT: paddw (%eax), %mm0 335; X86-NEXT: movq %mm0, (%esp) 336; X86-NEXT: movl (%esp), %eax 337; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 338; X86-NEXT: emms 339; X86-NEXT: movl %ebp, %esp 340; X86-NEXT: popl %ebp 341; X86-NEXT: retl 342; 343; X64-LABEL: tt1: 344; X64: # %bb.0: # %entry 345; X64-NEXT: paddw (%rdi), %mm0 346; X64-NEXT: movq %mm0, %rax 347; X64-NEXT: emms 348; X64-NEXT: retq 349entry: 350 %v = load x86_mmx, x86_mmx* %q 351 %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v) 352 %s = bitcast x86_mmx %u to i64 353 call void @llvm.x86.mmx.emms() 354 ret i64 %s 355} 356declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) 357 358define i64 @tt2(x86_mmx %t, x86_mmx* %q) nounwind { 359; X86-LABEL: tt2: 360; X86: # %bb.0: # %entry 361; X86-NEXT: pushl %ebp 362; X86-NEXT: movl %esp, %ebp 363; X86-NEXT: andl $-8, %esp 364; X86-NEXT: subl $8, %esp 365; X86-NEXT: movl 8(%ebp), %eax 366; X86-NEXT: paddd (%eax), %mm0 367; X86-NEXT: movq %mm0, (%esp) 368; X86-NEXT: movl (%esp), %eax 369; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 370; X86-NEXT: emms 371; X86-NEXT: movl %ebp, %esp 372; X86-NEXT: popl %ebp 373; X86-NEXT: retl 374; 375; X64-LABEL: tt2: 376; X64: # %bb.0: # %entry 377; X64-NEXT: paddd (%rdi), %mm0 378; X64-NEXT: movq %mm0, %rax 379; X64-NEXT: emms 380; X64-NEXT: retq 381entry: 382 %v = load x86_mmx, x86_mmx* %q 383 %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v) 384 %s = bitcast x86_mmx %u to i64 385 call void @llvm.x86.mmx.emms() 386 ret i64 %s 387} 388declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) 389 390define i64 @tt3(x86_mmx %t, x86_mmx* %q) nounwind { 391; X86-LABEL: tt3: 392; X86: # %bb.0: # %entry 393; X86-NEXT: pushl %ebp 394; X86-NEXT: movl %esp, %ebp 395; X86-NEXT: andl $-8, %esp 396; X86-NEXT: subl $8, %esp 397; X86-NEXT: movl 8(%ebp), %eax 398; X86-NEXT: paddq (%eax), %mm0 399; X86-NEXT: movq %mm0, (%esp) 400; X86-NEXT: movl (%esp), %eax 401; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 402; X86-NEXT: emms 403; X86-NEXT: movl %ebp, %esp 404; X86-NEXT: popl %ebp 405; X86-NEXT: retl 406; 407; X64-LABEL: tt3: 408; X64: # %bb.0: # %entry 409; X64-NEXT: paddq (%rdi), %mm0 410; X64-NEXT: movq %mm0, %rax 411; X64-NEXT: emms 412; X64-NEXT: retq 413entry: 414 %v = load x86_mmx, x86_mmx* %q 415 %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v) 416 %s = bitcast x86_mmx %u to i64 417 call void @llvm.x86.mmx.emms() 418 ret i64 %s 419} 420declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) 421 422define i64 @tt4(x86_mmx %t, x86_mmx* %q) nounwind { 423; X86-LABEL: tt4: 424; X86: # %bb.0: # %entry 425; X86-NEXT: pushl %ebp 426; X86-NEXT: movl %esp, %ebp 427; X86-NEXT: andl $-8, %esp 428; X86-NEXT: subl $8, %esp 429; X86-NEXT: movl 8(%ebp), %eax 430; X86-NEXT: paddusb (%eax), %mm0 431; X86-NEXT: movq %mm0, (%esp) 432; X86-NEXT: movl (%esp), %eax 433; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 434; X86-NEXT: emms 435; X86-NEXT: movl %ebp, %esp 436; X86-NEXT: popl %ebp 437; X86-NEXT: retl 438; 439; X64-LABEL: tt4: 440; X64: # %bb.0: # %entry 441; X64-NEXT: paddusb (%rdi), %mm0 442; X64-NEXT: movq %mm0, %rax 443; X64-NEXT: emms 444; X64-NEXT: retq 445entry: 446 %v = load x86_mmx, x86_mmx* %q 447 %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v) 448 %s = bitcast x86_mmx %u to i64 449 call void @llvm.x86.mmx.emms() 450 ret i64 %s 451} 452declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) 453 454define i64 @tt5(x86_mmx %t, x86_mmx* %q) nounwind { 455; X86-LABEL: tt5: 456; X86: # %bb.0: # %entry 457; X86-NEXT: pushl %ebp 458; X86-NEXT: movl %esp, %ebp 459; X86-NEXT: andl $-8, %esp 460; X86-NEXT: subl $8, %esp 461; X86-NEXT: movl 8(%ebp), %eax 462; X86-NEXT: paddusw (%eax), %mm0 463; X86-NEXT: movq %mm0, (%esp) 464; X86-NEXT: movl (%esp), %eax 465; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 466; X86-NEXT: emms 467; X86-NEXT: movl %ebp, %esp 468; X86-NEXT: popl %ebp 469; X86-NEXT: retl 470; 471; X64-LABEL: tt5: 472; X64: # %bb.0: # %entry 473; X64-NEXT: paddusw (%rdi), %mm0 474; X64-NEXT: movq %mm0, %rax 475; X64-NEXT: emms 476; X64-NEXT: retq 477entry: 478 %v = load x86_mmx, x86_mmx* %q 479 %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v) 480 %s = bitcast x86_mmx %u to i64 481 call void @llvm.x86.mmx.emms() 482 ret i64 %s 483} 484declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) 485 486define i64 @tt6(x86_mmx %t, x86_mmx* %q) nounwind { 487; X86-LABEL: tt6: 488; X86: # %bb.0: # %entry 489; X86-NEXT: pushl %ebp 490; X86-NEXT: movl %esp, %ebp 491; X86-NEXT: andl $-8, %esp 492; X86-NEXT: subl $8, %esp 493; X86-NEXT: movl 8(%ebp), %eax 494; X86-NEXT: psrlw (%eax), %mm0 495; X86-NEXT: movq %mm0, (%esp) 496; X86-NEXT: movl (%esp), %eax 497; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 498; X86-NEXT: emms 499; X86-NEXT: movl %ebp, %esp 500; X86-NEXT: popl %ebp 501; X86-NEXT: retl 502; 503; X64-LABEL: tt6: 504; X64: # %bb.0: # %entry 505; X64-NEXT: psrlw (%rdi), %mm0 506; X64-NEXT: movq %mm0, %rax 507; X64-NEXT: emms 508; X64-NEXT: retq 509entry: 510 %v = load x86_mmx, x86_mmx* %q 511 %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v) 512 %s = bitcast x86_mmx %u to i64 513 call void @llvm.x86.mmx.emms() 514 ret i64 %s 515} 516declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) 517 518define i64 @tt7(x86_mmx %t, x86_mmx* %q) nounwind { 519; X86-LABEL: tt7: 520; X86: # %bb.0: # %entry 521; X86-NEXT: pushl %ebp 522; X86-NEXT: movl %esp, %ebp 523; X86-NEXT: andl $-8, %esp 524; X86-NEXT: subl $8, %esp 525; X86-NEXT: movl 8(%ebp), %eax 526; X86-NEXT: psrld (%eax), %mm0 527; X86-NEXT: movq %mm0, (%esp) 528; X86-NEXT: movl (%esp), %eax 529; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 530; X86-NEXT: emms 531; X86-NEXT: movl %ebp, %esp 532; X86-NEXT: popl %ebp 533; X86-NEXT: retl 534; 535; X64-LABEL: tt7: 536; X64: # %bb.0: # %entry 537; X64-NEXT: psrld (%rdi), %mm0 538; X64-NEXT: movq %mm0, %rax 539; X64-NEXT: emms 540; X64-NEXT: retq 541entry: 542 %v = load x86_mmx, x86_mmx* %q 543 %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v) 544 %s = bitcast x86_mmx %u to i64 545 call void @llvm.x86.mmx.emms() 546 ret i64 %s 547} 548declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) 549 550define i64 @tt8(x86_mmx %t, x86_mmx* %q) nounwind { 551; X86-LABEL: tt8: 552; X86: # %bb.0: # %entry 553; X86-NEXT: pushl %ebp 554; X86-NEXT: movl %esp, %ebp 555; X86-NEXT: andl $-8, %esp 556; X86-NEXT: subl $8, %esp 557; X86-NEXT: movl 8(%ebp), %eax 558; X86-NEXT: psrlq (%eax), %mm0 559; X86-NEXT: movq %mm0, (%esp) 560; X86-NEXT: movl (%esp), %eax 561; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 562; X86-NEXT: emms 563; X86-NEXT: movl %ebp, %esp 564; X86-NEXT: popl %ebp 565; X86-NEXT: retl 566; 567; X64-LABEL: tt8: 568; X64: # %bb.0: # %entry 569; X64-NEXT: psrlq (%rdi), %mm0 570; X64-NEXT: movq %mm0, %rax 571; X64-NEXT: emms 572; X64-NEXT: retq 573entry: 574 %v = load x86_mmx, x86_mmx* %q 575 %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v) 576 %s = bitcast x86_mmx %u to i64 577 call void @llvm.x86.mmx.emms() 578 ret i64 %s 579} 580declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) 581 582define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind { 583; X86-LABEL: test_psrlq_by_volatile_shift_amount: 584; X86: # %bb.0: # %entry 585; X86-NEXT: pushl %eax 586; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 587; X86-NEXT: movl $1, (%esp) 588; X86-NEXT: movl $255, %ecx 589; X86-NEXT: movd %ecx, %mm0 590; X86-NEXT: movd (%esp), %mm1 591; X86-NEXT: psrlq %mm1, %mm0 592; X86-NEXT: movq %mm0, (%eax) 593; X86-NEXT: popl %eax 594; X86-NEXT: retl 595; 596; X64-LABEL: test_psrlq_by_volatile_shift_amount: 597; X64: # %bb.0: # %entry 598; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp) 599; X64-NEXT: movl $255, %eax 600; X64-NEXT: movd %eax, %mm0 601; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm1 602; X64-NEXT: psrlq %mm1, %mm0 603; X64-NEXT: movq %mm0, (%rdi) 604; X64-NEXT: retq 605entry: 606 %0 = alloca i32, align 4 607 %1 = bitcast i32* %0 to i8* 608 call void @llvm.lifetime.start(i64 4, i8* nonnull %1) 609 store volatile i32 1, i32* %0, align 4 610 %2 = load volatile i32, i32* %0, align 4 611 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx bitcast (<1 x i64> <i64 255> to x86_mmx), i32 %2) 612 store x86_mmx %3, x86_mmx* %t, align 8 613 call void @llvm.lifetime.end(i64 4, i8* nonnull %1) 614 ret void 615} 616 617declare void @llvm.lifetime.start(i64, i8* nocapture) 618declare void @llvm.lifetime.end(i64, i8* nocapture) 619 620; Make sure we shrink this vector load and fold it. 621define x86_mmx @vec_load(<4 x float>* %x) { 622; X86-LABEL: vec_load: 623; X86: # %bb.0: 624; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 625; X86-NEXT: pshufw $68, (%eax), %mm0 # mm0 = mem[0,1,0,1] 626; X86-NEXT: paddsb %mm0, %mm0 627; X86-NEXT: retl 628; 629; X64-LABEL: vec_load: 630; X64: # %bb.0: 631; X64-NEXT: pshufw $68, (%rdi), %mm0 # mm0 = mem[0,1,0,1] 632; X64-NEXT: paddsb %mm0, %mm0 633; X64-NEXT: movq2dq %mm0, %xmm0 634; X64-NEXT: retq 635 %z = load <4 x float>, <4 x float>* %x 636 %y = extractelement <4 x float> %z, i32 0 637 %a = insertelement <2 x float> undef, float %y, i32 0 638 %b = insertelement <2 x float> %a, float %y, i32 1 639 %c = bitcast <2 x float> %b to x86_mmx 640 %d = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %c, x86_mmx %c) 641 ret x86_mmx %d 642} 643 644declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) 645 646