1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=X64 4 5define void @fadd_2f64_mem(<2 x double>* %p0, <2 x double>* %p1, <2 x double>* %p2) nounwind { 6; X32-LABEL: fadd_2f64_mem: 7; X32: # %bb.0: 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11; X32-NEXT: fldl 8(%edx) 12; X32-NEXT: fldl (%edx) 13; X32-NEXT: faddl (%ecx) 14; X32-NEXT: fxch %st(1) 15; X32-NEXT: faddl 8(%ecx) 16; X32-NEXT: fstpl 8(%eax) 17; X32-NEXT: fstpl (%eax) 18; X32-NEXT: retl 19; 20; X64-LABEL: fadd_2f64_mem: 21; X64: # %bb.0: 22; X64-NEXT: fldl 8(%rdi) 23; X64-NEXT: fldl (%rdi) 24; X64-NEXT: faddl (%rsi) 25; X64-NEXT: fxch %st(1) 26; X64-NEXT: faddl 8(%rsi) 27; X64-NEXT: fstpl 8(%rdx) 28; X64-NEXT: fstpl (%rdx) 29; X64-NEXT: retq 30 %1 = load <2 x double>, <2 x double>* %p0 31 %2 = load <2 x double>, <2 x double>* %p1 32 %3 = fadd <2 x double> %1, %2 33 store <2 x double> %3, <2 x double>* %p2 34 ret void 35} 36 37define void @fadd_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { 38; X32-LABEL: fadd_4f32_mem: 39; X32: # %bb.0: 40; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 41; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 42; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 43; X32-NEXT: flds 12(%edx) 44; X32-NEXT: flds 8(%edx) 45; X32-NEXT: flds 4(%edx) 46; X32-NEXT: flds (%edx) 47; X32-NEXT: fadds (%ecx) 48; X32-NEXT: fxch %st(1) 49; X32-NEXT: fadds 4(%ecx) 50; X32-NEXT: fxch %st(2) 51; X32-NEXT: fadds 8(%ecx) 52; X32-NEXT: fxch %st(3) 53; X32-NEXT: fadds 12(%ecx) 54; X32-NEXT: fstps 12(%eax) 55; X32-NEXT: fxch %st(2) 56; X32-NEXT: fstps 8(%eax) 57; X32-NEXT: fstps 4(%eax) 58; X32-NEXT: fstps (%eax) 59; X32-NEXT: retl 60; 61; X64-LABEL: fadd_4f32_mem: 62; X64: # %bb.0: 63; X64-NEXT: flds 12(%rdi) 64; X64-NEXT: flds 8(%rdi) 65; X64-NEXT: flds 4(%rdi) 66; X64-NEXT: flds (%rdi) 67; X64-NEXT: fadds (%rsi) 68; X64-NEXT: fxch %st(1) 69; X64-NEXT: fadds 4(%rsi) 70; X64-NEXT: fxch %st(2) 71; X64-NEXT: fadds 8(%rsi) 72; X64-NEXT: fxch %st(3) 73; X64-NEXT: fadds 12(%rsi) 74; X64-NEXT: fstps 12(%rdx) 75; X64-NEXT: fxch %st(2) 76; X64-NEXT: fstps 8(%rdx) 77; X64-NEXT: fstps 4(%rdx) 78; X64-NEXT: fstps (%rdx) 79; X64-NEXT: retq 80 %1 = load <4 x float>, <4 x float>* %p0 81 %2 = load <4 x float>, <4 x float>* %p1 82 %3 = fadd <4 x float> %1, %2 83 store <4 x float> %3, <4 x float>* %p2 84 ret void 85} 86 87define void @fdiv_4f32_mem(<4 x float>* %p0, <4 x float>* %p1, <4 x float>* %p2) nounwind { 88; X32-LABEL: fdiv_4f32_mem: 89; X32: # %bb.0: 90; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 91; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 92; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 93; X32-NEXT: flds 12(%edx) 94; X32-NEXT: flds 8(%edx) 95; X32-NEXT: flds 4(%edx) 96; X32-NEXT: flds (%edx) 97; X32-NEXT: fdivs (%ecx) 98; X32-NEXT: fxch %st(1) 99; X32-NEXT: fdivs 4(%ecx) 100; X32-NEXT: fxch %st(2) 101; X32-NEXT: fdivs 8(%ecx) 102; X32-NEXT: fxch %st(3) 103; X32-NEXT: fdivs 12(%ecx) 104; X32-NEXT: fstps 12(%eax) 105; X32-NEXT: fxch %st(2) 106; X32-NEXT: fstps 8(%eax) 107; X32-NEXT: fstps 4(%eax) 108; X32-NEXT: fstps (%eax) 109; X32-NEXT: retl 110; 111; X64-LABEL: fdiv_4f32_mem: 112; X64: # %bb.0: 113; X64-NEXT: flds 12(%rdi) 114; X64-NEXT: flds 8(%rdi) 115; X64-NEXT: flds 4(%rdi) 116; X64-NEXT: flds (%rdi) 117; X64-NEXT: fdivs (%rsi) 118; X64-NEXT: fxch %st(1) 119; X64-NEXT: fdivs 4(%rsi) 120; X64-NEXT: fxch %st(2) 121; X64-NEXT: fdivs 8(%rsi) 122; X64-NEXT: fxch %st(3) 123; X64-NEXT: fdivs 12(%rsi) 124; X64-NEXT: fstps 12(%rdx) 125; X64-NEXT: fxch %st(2) 126; X64-NEXT: fstps 8(%rdx) 127; X64-NEXT: fstps 4(%rdx) 128; X64-NEXT: fstps (%rdx) 129; X64-NEXT: retq 130 %1 = load <4 x float>, <4 x float>* %p0 131 %2 = load <4 x float>, <4 x float>* %p1 132 %3 = fdiv <4 x float> %1, %2 133 store <4 x float> %3, <4 x float>* %p2 134 ret void 135} 136 137define void @sitofp_4i64_4f32_mem(<4 x i64>* %p0, <4 x float>* %p1) nounwind { 138; X32-LABEL: sitofp_4i64_4f32_mem: 139; X32: # %bb.0: 140; X32-NEXT: pushl %ebp 141; X32-NEXT: movl %esp, %ebp 142; X32-NEXT: pushl %ebx 143; X32-NEXT: pushl %edi 144; X32-NEXT: pushl %esi 145; X32-NEXT: andl $-8, %esp 146; X32-NEXT: subl $48, %esp 147; X32-NEXT: movl 8(%ebp), %eax 148; X32-NEXT: movl 24(%eax), %ecx 149; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 150; X32-NEXT: movl 28(%eax), %ecx 151; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill 152; X32-NEXT: movl 16(%eax), %esi 153; X32-NEXT: movl 20(%eax), %edi 154; X32-NEXT: movl 8(%eax), %ebx 155; X32-NEXT: movl 12(%eax), %edx 156; X32-NEXT: movl (%eax), %ecx 157; X32-NEXT: movl 4(%eax), %eax 158; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 159; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 160; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 161; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) 162; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) 163; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) 164; X32-NEXT: movl (%esp), %eax # 4-byte Reload 165; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 166; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 167; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 168; X32-NEXT: movl 12(%ebp), %eax 169; X32-NEXT: fildll {{[0-9]+}}(%esp) 170; X32-NEXT: fildll {{[0-9]+}}(%esp) 171; X32-NEXT: fildll {{[0-9]+}}(%esp) 172; X32-NEXT: fildll {{[0-9]+}}(%esp) 173; X32-NEXT: fstps 12(%eax) 174; X32-NEXT: fstps 8(%eax) 175; X32-NEXT: fstps 4(%eax) 176; X32-NEXT: fstps (%eax) 177; X32-NEXT: leal -12(%ebp), %esp 178; X32-NEXT: popl %esi 179; X32-NEXT: popl %edi 180; X32-NEXT: popl %ebx 181; X32-NEXT: popl %ebp 182; X32-NEXT: retl 183; 184; X64-LABEL: sitofp_4i64_4f32_mem: 185; X64: # %bb.0: 186; X64-NEXT: movq 24(%rdi), %rax 187; X64-NEXT: movq 16(%rdi), %rcx 188; X64-NEXT: movq (%rdi), %rdx 189; X64-NEXT: movq 8(%rdi), %rdi 190; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 191; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 192; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 193; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 194; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 195; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 196; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 197; X64-NEXT: fildll -{{[0-9]+}}(%rsp) 198; X64-NEXT: fstps 12(%rsi) 199; X64-NEXT: fstps 8(%rsi) 200; X64-NEXT: fstps 4(%rsi) 201; X64-NEXT: fstps (%rsi) 202; X64-NEXT: retq 203 %1 = load <4 x i64>, <4 x i64>* %p0 204 %2 = sitofp <4 x i64> %1 to <4 x float> 205 store <4 x float> %2, <4 x float>* %p1 206 ret void 207} 208 209define void @sitofp_4i32_4f32_mem(<4 x i32>* %p0, <4 x float>* %p1) nounwind { 210; X32-LABEL: sitofp_4i32_4f32_mem: 211; X32: # %bb.0: 212; X32-NEXT: pushl %edi 213; X32-NEXT: pushl %esi 214; X32-NEXT: subl $16, %esp 215; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 216; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 217; X32-NEXT: movl 12(%ecx), %edx 218; X32-NEXT: movl 8(%ecx), %esi 219; X32-NEXT: movl (%ecx), %edi 220; X32-NEXT: movl 4(%ecx), %ecx 221; X32-NEXT: movl %edi, (%esp) 222; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 223; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) 224; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 225; X32-NEXT: fildl (%esp) 226; X32-NEXT: fildl {{[0-9]+}}(%esp) 227; X32-NEXT: fildl {{[0-9]+}}(%esp) 228; X32-NEXT: fildl {{[0-9]+}}(%esp) 229; X32-NEXT: fstps 12(%eax) 230; X32-NEXT: fstps 8(%eax) 231; X32-NEXT: fstps 4(%eax) 232; X32-NEXT: fstps (%eax) 233; X32-NEXT: addl $16, %esp 234; X32-NEXT: popl %esi 235; X32-NEXT: popl %edi 236; X32-NEXT: retl 237; 238; X64-LABEL: sitofp_4i32_4f32_mem: 239; X64: # %bb.0: 240; X64-NEXT: movl 12(%rdi), %eax 241; X64-NEXT: movl 8(%rdi), %ecx 242; X64-NEXT: movl (%rdi), %edx 243; X64-NEXT: movl 4(%rdi), %edi 244; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 245; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 246; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 247; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 248; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 249; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 250; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 251; X64-NEXT: fildl -{{[0-9]+}}(%rsp) 252; X64-NEXT: fstps 12(%rsi) 253; X64-NEXT: fstps 8(%rsi) 254; X64-NEXT: fstps 4(%rsi) 255; X64-NEXT: fstps (%rsi) 256; X64-NEXT: retq 257 %1 = load <4 x i32>, <4 x i32>* %p0 258 %2 = sitofp <4 x i32> %1 to <4 x float> 259 store <4 x float> %2, <4 x float>* %p1 260 ret void 261} 262 263define void @add_2i64_mem(<2 x i64>* %p0, <2 x i64>* %p1, <2 x i64>* %p2) nounwind { 264; X32-LABEL: add_2i64_mem: 265; X32: # %bb.0: 266; X32-NEXT: pushl %ebx 267; X32-NEXT: pushl %edi 268; X32-NEXT: pushl %esi 269; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 270; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 271; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 272; X32-NEXT: movl 12(%edx), %esi 273; X32-NEXT: movl 8(%edx), %edi 274; X32-NEXT: movl (%edx), %ebx 275; X32-NEXT: movl 4(%edx), %edx 276; X32-NEXT: addl (%ecx), %ebx 277; X32-NEXT: adcl 4(%ecx), %edx 278; X32-NEXT: addl 8(%ecx), %edi 279; X32-NEXT: adcl 12(%ecx), %esi 280; X32-NEXT: movl %edi, 8(%eax) 281; X32-NEXT: movl %ebx, (%eax) 282; X32-NEXT: movl %esi, 12(%eax) 283; X32-NEXT: movl %edx, 4(%eax) 284; X32-NEXT: popl %esi 285; X32-NEXT: popl %edi 286; X32-NEXT: popl %ebx 287; X32-NEXT: retl 288; 289; X64-LABEL: add_2i64_mem: 290; X64: # %bb.0: 291; X64-NEXT: movq (%rdi), %rax 292; X64-NEXT: movq 8(%rdi), %rcx 293; X64-NEXT: addq (%rsi), %rax 294; X64-NEXT: addq 8(%rsi), %rcx 295; X64-NEXT: movq %rcx, 8(%rdx) 296; X64-NEXT: movq %rax, (%rdx) 297; X64-NEXT: retq 298 %1 = load <2 x i64>, <2 x i64>* %p0 299 %2 = load <2 x i64>, <2 x i64>* %p1 300 %3 = add <2 x i64> %1, %2 301 store <2 x i64> %3, <2 x i64>* %p2 302 ret void 303} 304 305define void @add_4i32_mem(<4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2) nounwind { 306; X32-LABEL: add_4i32_mem: 307; X32: # %bb.0: 308; X32-NEXT: pushl %ebx 309; X32-NEXT: pushl %edi 310; X32-NEXT: pushl %esi 311; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 312; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 313; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 314; X32-NEXT: movl 12(%edx), %esi 315; X32-NEXT: movl 8(%edx), %edi 316; X32-NEXT: movl (%edx), %ebx 317; X32-NEXT: movl 4(%edx), %edx 318; X32-NEXT: addl (%ecx), %ebx 319; X32-NEXT: addl 4(%ecx), %edx 320; X32-NEXT: addl 8(%ecx), %edi 321; X32-NEXT: addl 12(%ecx), %esi 322; X32-NEXT: movl %esi, 12(%eax) 323; X32-NEXT: movl %edi, 8(%eax) 324; X32-NEXT: movl %edx, 4(%eax) 325; X32-NEXT: movl %ebx, (%eax) 326; X32-NEXT: popl %esi 327; X32-NEXT: popl %edi 328; X32-NEXT: popl %ebx 329; X32-NEXT: retl 330; 331; X64-LABEL: add_4i32_mem: 332; X64: # %bb.0: 333; X64-NEXT: movl 12(%rdi), %eax 334; X64-NEXT: movl 8(%rdi), %ecx 335; X64-NEXT: movl (%rdi), %r8d 336; X64-NEXT: movl 4(%rdi), %edi 337; X64-NEXT: addl (%rsi), %r8d 338; X64-NEXT: addl 4(%rsi), %edi 339; X64-NEXT: addl 8(%rsi), %ecx 340; X64-NEXT: addl 12(%rsi), %eax 341; X64-NEXT: movl %eax, 12(%rdx) 342; X64-NEXT: movl %ecx, 8(%rdx) 343; X64-NEXT: movl %edi, 4(%rdx) 344; X64-NEXT: movl %r8d, (%rdx) 345; X64-NEXT: retq 346 %1 = load <4 x i32>, <4 x i32>* %p0 347 %2 = load <4 x i32>, <4 x i32>* %p1 348 %3 = add <4 x i32> %1, %2 349 store <4 x i32> %3, <4 x i32>* %p2 350 ret void 351} 352