1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX 10 11; ----- FADD ----- 12 13define void @fadd_32r(float* %loc, float %val) nounwind { 14; X86-NOSSE-LABEL: fadd_32r: 15; X86-NOSSE: # %bb.0: 16; X86-NOSSE-NEXT: subl $8, %esp 17; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 18; X86-NOSSE-NEXT: movl (%eax), %ecx 19; X86-NOSSE-NEXT: movl %ecx, (%esp) 20; X86-NOSSE-NEXT: flds (%esp) 21; X86-NOSSE-NEXT: fadds {{[0-9]+}}(%esp) 22; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 23; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 24; X86-NOSSE-NEXT: movl %ecx, (%eax) 25; X86-NOSSE-NEXT: addl $8, %esp 26; X86-NOSSE-NEXT: retl 27; 28; X86-SSE1-LABEL: fadd_32r: 29; X86-SSE1: # %bb.0: 30; X86-SSE1-NEXT: subl $8, %esp 31; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 32; X86-SSE1-NEXT: movl (%eax), %ecx 33; X86-SSE1-NEXT: movl %ecx, (%esp) 34; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 35; X86-SSE1-NEXT: addss {{[0-9]+}}(%esp), %xmm0 36; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 37; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx 38; X86-SSE1-NEXT: movl %ecx, (%eax) 39; X86-SSE1-NEXT: addl $8, %esp 40; X86-SSE1-NEXT: retl 41; 42; X86-SSE2-LABEL: fadd_32r: 43; X86-SSE2: # %bb.0: 44; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 45; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 46; X86-SSE2-NEXT: addss (%eax), %xmm0 47; X86-SSE2-NEXT: movss %xmm0, (%eax) 48; X86-SSE2-NEXT: retl 49; 50; X86-AVX-LABEL: fadd_32r: 51; X86-AVX: # %bb.0: 52; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 53; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 54; X86-AVX-NEXT: vaddss (%eax), %xmm0, %xmm0 55; X86-AVX-NEXT: vmovss %xmm0, (%eax) 56; X86-AVX-NEXT: retl 57; 58; X64-SSE-LABEL: fadd_32r: 59; X64-SSE: # %bb.0: 60; X64-SSE-NEXT: addss (%rdi), %xmm0 61; X64-SSE-NEXT: movss %xmm0, (%rdi) 62; X64-SSE-NEXT: retq 63; 64; X64-AVX-LABEL: fadd_32r: 65; X64-AVX: # %bb.0: 66; X64-AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 67; X64-AVX-NEXT: vmovss %xmm0, (%rdi) 68; X64-AVX-NEXT: retq 69 %floc = bitcast float* %loc to i32* 70 %1 = load atomic i32, i32* %floc seq_cst, align 4 71 %2 = bitcast i32 %1 to float 72 %add = fadd float %2, %val 73 %3 = bitcast float %add to i32 74 store atomic i32 %3, i32* %floc release, align 4 75 ret void 76} 77 78define void @fadd_64r(double* %loc, double %val) nounwind { 79; X86-NOSSE-LABEL: fadd_64r: 80; X86-NOSSE: # %bb.0: 81; X86-NOSSE-NEXT: pushl %ebp 82; X86-NOSSE-NEXT: movl %esp, %ebp 83; X86-NOSSE-NEXT: andl $-8, %esp 84; X86-NOSSE-NEXT: subl $32, %esp 85; X86-NOSSE-NEXT: movl 8(%ebp), %eax 86; X86-NOSSE-NEXT: fildll (%eax) 87; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 88; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 89; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 90; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 91; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 92; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 93; X86-NOSSE-NEXT: faddl 12(%ebp) 94; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 95; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 96; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 97; X86-NOSSE-NEXT: movl %ecx, (%esp) 98; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 99; X86-NOSSE-NEXT: fildll (%esp) 100; X86-NOSSE-NEXT: fistpll (%eax) 101; X86-NOSSE-NEXT: movl %ebp, %esp 102; X86-NOSSE-NEXT: popl %ebp 103; X86-NOSSE-NEXT: retl 104; 105; X86-SSE1-LABEL: fadd_64r: 106; X86-SSE1: # %bb.0: 107; X86-SSE1-NEXT: pushl %ebp 108; X86-SSE1-NEXT: movl %esp, %ebp 109; X86-SSE1-NEXT: andl $-8, %esp 110; X86-SSE1-NEXT: subl $16, %esp 111; X86-SSE1-NEXT: movl 8(%ebp), %eax 112; X86-SSE1-NEXT: xorps %xmm0, %xmm0 113; X86-SSE1-NEXT: xorps %xmm1, %xmm1 114; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 115; X86-SSE1-NEXT: movss %xmm1, (%esp) 116; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 117; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 118; X86-SSE1-NEXT: fldl (%esp) 119; X86-SSE1-NEXT: faddl 12(%ebp) 120; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 121; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 122; X86-SSE1-NEXT: movlps %xmm0, (%eax) 123; X86-SSE1-NEXT: movl %ebp, %esp 124; X86-SSE1-NEXT: popl %ebp 125; X86-SSE1-NEXT: retl 126; 127; X86-SSE2-LABEL: fadd_64r: 128; X86-SSE2: # %bb.0: 129; X86-SSE2-NEXT: pushl %ebp 130; X86-SSE2-NEXT: movl %esp, %ebp 131; X86-SSE2-NEXT: andl $-8, %esp 132; X86-SSE2-NEXT: subl $8, %esp 133; X86-SSE2-NEXT: movl 8(%ebp), %eax 134; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 135; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 136; X86-SSE2-NEXT: movsd %xmm0, (%esp) 137; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 138; X86-SSE2-NEXT: movlps %xmm0, (%eax) 139; X86-SSE2-NEXT: movl %ebp, %esp 140; X86-SSE2-NEXT: popl %ebp 141; X86-SSE2-NEXT: retl 142; 143; X86-AVX-LABEL: fadd_64r: 144; X86-AVX: # %bb.0: 145; X86-AVX-NEXT: pushl %ebp 146; X86-AVX-NEXT: movl %esp, %ebp 147; X86-AVX-NEXT: andl $-8, %esp 148; X86-AVX-NEXT: subl $8, %esp 149; X86-AVX-NEXT: movl 8(%ebp), %eax 150; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 151; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 152; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 153; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 154; X86-AVX-NEXT: vmovlps %xmm0, (%eax) 155; X86-AVX-NEXT: movl %ebp, %esp 156; X86-AVX-NEXT: popl %ebp 157; X86-AVX-NEXT: retl 158; 159; X64-SSE-LABEL: fadd_64r: 160; X64-SSE: # %bb.0: 161; X64-SSE-NEXT: addsd (%rdi), %xmm0 162; X64-SSE-NEXT: movsd %xmm0, (%rdi) 163; X64-SSE-NEXT: retq 164; 165; X64-AVX-LABEL: fadd_64r: 166; X64-AVX: # %bb.0: 167; X64-AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 168; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) 169; X64-AVX-NEXT: retq 170 %floc = bitcast double* %loc to i64* 171 %1 = load atomic i64, i64* %floc seq_cst, align 8 172 %2 = bitcast i64 %1 to double 173 %add = fadd double %2, %val 174 %3 = bitcast double %add to i64 175 store atomic i64 %3, i64* %floc release, align 8 176 ret void 177} 178 179@glob32 = global float 0.000000e+00, align 4 180@glob64 = global double 0.000000e+00, align 8 181 182; Floating-point add to a global using an immediate. 183define void @fadd_32g() nounwind { 184; X86-NOSSE-LABEL: fadd_32g: 185; X86-NOSSE: # %bb.0: 186; X86-NOSSE-NEXT: subl $8, %esp 187; X86-NOSSE-NEXT: movl glob32, %eax 188; X86-NOSSE-NEXT: movl %eax, (%esp) 189; X86-NOSSE-NEXT: fld1 190; X86-NOSSE-NEXT: fadds (%esp) 191; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 192; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 193; X86-NOSSE-NEXT: movl %eax, glob32 194; X86-NOSSE-NEXT: addl $8, %esp 195; X86-NOSSE-NEXT: retl 196; 197; X86-SSE1-LABEL: fadd_32g: 198; X86-SSE1: # %bb.0: 199; X86-SSE1-NEXT: subl $8, %esp 200; X86-SSE1-NEXT: movl glob32, %eax 201; X86-SSE1-NEXT: movl %eax, (%esp) 202; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 203; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 204; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 205; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 206; X86-SSE1-NEXT: movl %eax, glob32 207; X86-SSE1-NEXT: addl $8, %esp 208; X86-SSE1-NEXT: retl 209; 210; X86-SSE2-LABEL: fadd_32g: 211; X86-SSE2: # %bb.0: 212; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 213; X86-SSE2-NEXT: addss glob32, %xmm0 214; X86-SSE2-NEXT: movss %xmm0, glob32 215; X86-SSE2-NEXT: retl 216; 217; X86-AVX-LABEL: fadd_32g: 218; X86-AVX: # %bb.0: 219; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 220; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0 221; X86-AVX-NEXT: vmovss %xmm0, glob32 222; X86-AVX-NEXT: retl 223; 224; X64-SSE-LABEL: fadd_32g: 225; X64-SSE: # %bb.0: 226; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 227; X64-SSE-NEXT: addss {{.*}}(%rip), %xmm0 228; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip) 229; X64-SSE-NEXT: retq 230; 231; X64-AVX-LABEL: fadd_32g: 232; X64-AVX: # %bb.0: 233; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 234; X64-AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 235; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip) 236; X64-AVX-NEXT: retq 237 %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 238 %f = bitcast i32 %i to float 239 %add = fadd float %f, 1.000000e+00 240 %s = bitcast float %add to i32 241 store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 242 ret void 243} 244 245define void @fadd_64g() nounwind { 246; X86-NOSSE-LABEL: fadd_64g: 247; X86-NOSSE: # %bb.0: 248; X86-NOSSE-NEXT: pushl %ebp 249; X86-NOSSE-NEXT: movl %esp, %ebp 250; X86-NOSSE-NEXT: andl $-8, %esp 251; X86-NOSSE-NEXT: subl $32, %esp 252; X86-NOSSE-NEXT: fildll glob64 253; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 254; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 255; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 256; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 257; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 258; X86-NOSSE-NEXT: fld1 259; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 260; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 261; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 262; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 263; X86-NOSSE-NEXT: movl %eax, (%esp) 264; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 265; X86-NOSSE-NEXT: fildll (%esp) 266; X86-NOSSE-NEXT: fistpll glob64 267; X86-NOSSE-NEXT: movl %ebp, %esp 268; X86-NOSSE-NEXT: popl %ebp 269; X86-NOSSE-NEXT: retl 270; 271; X86-SSE1-LABEL: fadd_64g: 272; X86-SSE1: # %bb.0: 273; X86-SSE1-NEXT: pushl %ebp 274; X86-SSE1-NEXT: movl %esp, %ebp 275; X86-SSE1-NEXT: andl $-8, %esp 276; X86-SSE1-NEXT: subl $16, %esp 277; X86-SSE1-NEXT: xorps %xmm0, %xmm0 278; X86-SSE1-NEXT: xorps %xmm1, %xmm1 279; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 280; X86-SSE1-NEXT: movss %xmm1, (%esp) 281; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 282; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 283; X86-SSE1-NEXT: fld1 284; X86-SSE1-NEXT: faddl (%esp) 285; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 286; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 287; X86-SSE1-NEXT: movlps %xmm0, glob64 288; X86-SSE1-NEXT: movl %ebp, %esp 289; X86-SSE1-NEXT: popl %ebp 290; X86-SSE1-NEXT: retl 291; 292; X86-SSE2-LABEL: fadd_64g: 293; X86-SSE2: # %bb.0: 294; X86-SSE2-NEXT: pushl %ebp 295; X86-SSE2-NEXT: movl %esp, %ebp 296; X86-SSE2-NEXT: andl $-8, %esp 297; X86-SSE2-NEXT: subl $8, %esp 298; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 299; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 300; X86-SSE2-NEXT: movsd %xmm0, (%esp) 301; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 302; X86-SSE2-NEXT: movlps %xmm0, glob64 303; X86-SSE2-NEXT: movl %ebp, %esp 304; X86-SSE2-NEXT: popl %ebp 305; X86-SSE2-NEXT: retl 306; 307; X86-AVX-LABEL: fadd_64g: 308; X86-AVX: # %bb.0: 309; X86-AVX-NEXT: pushl %ebp 310; X86-AVX-NEXT: movl %esp, %ebp 311; X86-AVX-NEXT: andl $-8, %esp 312; X86-AVX-NEXT: subl $8, %esp 313; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 314; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 315; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 316; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 317; X86-AVX-NEXT: vmovlps %xmm0, glob64 318; X86-AVX-NEXT: movl %ebp, %esp 319; X86-AVX-NEXT: popl %ebp 320; X86-AVX-NEXT: retl 321; 322; X64-SSE-LABEL: fadd_64g: 323; X64-SSE: # %bb.0: 324; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 325; X64-SSE-NEXT: addsd {{.*}}(%rip), %xmm0 326; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip) 327; X64-SSE-NEXT: retq 328; 329; X64-AVX-LABEL: fadd_64g: 330; X64-AVX: # %bb.0: 331; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 332; X64-AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 333; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip) 334; X64-AVX-NEXT: retq 335 %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 336 %f = bitcast i64 %i to double 337 %add = fadd double %f, 1.000000e+00 338 %s = bitcast double %add to i64 339 store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 340 ret void 341} 342 343; Floating-point add to a hard-coded immediate location using an immediate. 344define void @fadd_32imm() nounwind { 345; X86-NOSSE-LABEL: fadd_32imm: 346; X86-NOSSE: # %bb.0: 347; X86-NOSSE-NEXT: subl $8, %esp 348; X86-NOSSE-NEXT: movl -559038737, %eax 349; X86-NOSSE-NEXT: movl %eax, (%esp) 350; X86-NOSSE-NEXT: fld1 351; X86-NOSSE-NEXT: fadds (%esp) 352; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 353; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 354; X86-NOSSE-NEXT: movl %eax, -559038737 355; X86-NOSSE-NEXT: addl $8, %esp 356; X86-NOSSE-NEXT: retl 357; 358; X86-SSE1-LABEL: fadd_32imm: 359; X86-SSE1: # %bb.0: 360; X86-SSE1-NEXT: subl $8, %esp 361; X86-SSE1-NEXT: movl -559038737, %eax 362; X86-SSE1-NEXT: movl %eax, (%esp) 363; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 364; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 365; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 366; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 367; X86-SSE1-NEXT: movl %eax, -559038737 368; X86-SSE1-NEXT: addl $8, %esp 369; X86-SSE1-NEXT: retl 370; 371; X86-SSE2-LABEL: fadd_32imm: 372; X86-SSE2: # %bb.0: 373; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 374; X86-SSE2-NEXT: addss -559038737, %xmm0 375; X86-SSE2-NEXT: movss %xmm0, -559038737 376; X86-SSE2-NEXT: retl 377; 378; X86-AVX-LABEL: fadd_32imm: 379; X86-AVX: # %bb.0: 380; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 381; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0 382; X86-AVX-NEXT: vmovss %xmm0, -559038737 383; X86-AVX-NEXT: retl 384; 385; X64-SSE-LABEL: fadd_32imm: 386; X64-SSE: # %bb.0: 387; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 388; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 389; X64-SSE-NEXT: addss (%rax), %xmm0 390; X64-SSE-NEXT: movss %xmm0, (%rax) 391; X64-SSE-NEXT: retq 392; 393; X64-AVX-LABEL: fadd_32imm: 394; X64-AVX: # %bb.0: 395; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 396; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 397; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0 398; X64-AVX-NEXT: vmovss %xmm0, (%rax) 399; X64-AVX-NEXT: retq 400 %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 401 %f = bitcast i32 %i to float 402 %add = fadd float %f, 1.000000e+00 403 %s = bitcast float %add to i32 404 store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 405 ret void 406} 407 408define void @fadd_64imm() nounwind { 409; X86-NOSSE-LABEL: fadd_64imm: 410; X86-NOSSE: # %bb.0: 411; X86-NOSSE-NEXT: pushl %ebp 412; X86-NOSSE-NEXT: movl %esp, %ebp 413; X86-NOSSE-NEXT: andl $-8, %esp 414; X86-NOSSE-NEXT: subl $32, %esp 415; X86-NOSSE-NEXT: fildll -559038737 416; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 417; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 418; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 419; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 420; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 421; X86-NOSSE-NEXT: fld1 422; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 423; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 424; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 425; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 426; X86-NOSSE-NEXT: movl %eax, (%esp) 427; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 428; X86-NOSSE-NEXT: fildll (%esp) 429; X86-NOSSE-NEXT: fistpll -559038737 430; X86-NOSSE-NEXT: movl %ebp, %esp 431; X86-NOSSE-NEXT: popl %ebp 432; X86-NOSSE-NEXT: retl 433; 434; X86-SSE1-LABEL: fadd_64imm: 435; X86-SSE1: # %bb.0: 436; X86-SSE1-NEXT: pushl %ebp 437; X86-SSE1-NEXT: movl %esp, %ebp 438; X86-SSE1-NEXT: andl $-8, %esp 439; X86-SSE1-NEXT: subl $16, %esp 440; X86-SSE1-NEXT: xorps %xmm0, %xmm0 441; X86-SSE1-NEXT: xorps %xmm1, %xmm1 442; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 443; X86-SSE1-NEXT: movss %xmm1, (%esp) 444; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 445; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 446; X86-SSE1-NEXT: fld1 447; X86-SSE1-NEXT: faddl (%esp) 448; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 449; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 450; X86-SSE1-NEXT: movlps %xmm0, -559038737 451; X86-SSE1-NEXT: movl %ebp, %esp 452; X86-SSE1-NEXT: popl %ebp 453; X86-SSE1-NEXT: retl 454; 455; X86-SSE2-LABEL: fadd_64imm: 456; X86-SSE2: # %bb.0: 457; X86-SSE2-NEXT: pushl %ebp 458; X86-SSE2-NEXT: movl %esp, %ebp 459; X86-SSE2-NEXT: andl $-8, %esp 460; X86-SSE2-NEXT: subl $8, %esp 461; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 462; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 463; X86-SSE2-NEXT: movsd %xmm0, (%esp) 464; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 465; X86-SSE2-NEXT: movlps %xmm0, -559038737 466; X86-SSE2-NEXT: movl %ebp, %esp 467; X86-SSE2-NEXT: popl %ebp 468; X86-SSE2-NEXT: retl 469; 470; X86-AVX-LABEL: fadd_64imm: 471; X86-AVX: # %bb.0: 472; X86-AVX-NEXT: pushl %ebp 473; X86-AVX-NEXT: movl %esp, %ebp 474; X86-AVX-NEXT: andl $-8, %esp 475; X86-AVX-NEXT: subl $8, %esp 476; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 477; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 478; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 479; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 480; X86-AVX-NEXT: vmovlps %xmm0, -559038737 481; X86-AVX-NEXT: movl %ebp, %esp 482; X86-AVX-NEXT: popl %ebp 483; X86-AVX-NEXT: retl 484; 485; X64-SSE-LABEL: fadd_64imm: 486; X64-SSE: # %bb.0: 487; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 488; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 489; X64-SSE-NEXT: addsd (%rax), %xmm0 490; X64-SSE-NEXT: movsd %xmm0, (%rax) 491; X64-SSE-NEXT: retq 492; 493; X64-AVX-LABEL: fadd_64imm: 494; X64-AVX: # %bb.0: 495; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF 496; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 497; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0 498; X64-AVX-NEXT: vmovsd %xmm0, (%rax) 499; X64-AVX-NEXT: retq 500 %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 501 %f = bitcast i64 %i to double 502 %add = fadd double %f, 1.000000e+00 503 %s = bitcast double %add to i64 504 store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 505 ret void 506} 507 508; Floating-point add to a stack location. 509define void @fadd_32stack() nounwind { 510; X86-NOSSE-LABEL: fadd_32stack: 511; X86-NOSSE: # %bb.0: 512; X86-NOSSE-NEXT: subl $12, %esp 513; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 514; X86-NOSSE-NEXT: movl %eax, (%esp) 515; X86-NOSSE-NEXT: fld1 516; X86-NOSSE-NEXT: fadds (%esp) 517; X86-NOSSE-NEXT: fstps {{[0-9]+}}(%esp) 518; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 519; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 520; X86-NOSSE-NEXT: addl $12, %esp 521; X86-NOSSE-NEXT: retl 522; 523; X86-SSE1-LABEL: fadd_32stack: 524; X86-SSE1: # %bb.0: 525; X86-SSE1-NEXT: subl $12, %esp 526; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 527; X86-SSE1-NEXT: movl %eax, (%esp) 528; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 529; X86-SSE1-NEXT: addss {{\.LCPI.*}}, %xmm0 530; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 531; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax 532; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) 533; X86-SSE1-NEXT: addl $12, %esp 534; X86-SSE1-NEXT: retl 535; 536; X86-SSE2-LABEL: fadd_32stack: 537; X86-SSE2: # %bb.0: 538; X86-SSE2-NEXT: pushl %eax 539; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 540; X86-SSE2-NEXT: addss (%esp), %xmm0 541; X86-SSE2-NEXT: movss %xmm0, (%esp) 542; X86-SSE2-NEXT: popl %eax 543; X86-SSE2-NEXT: retl 544; 545; X86-AVX-LABEL: fadd_32stack: 546; X86-AVX: # %bb.0: 547; X86-AVX-NEXT: pushl %eax 548; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 549; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0 550; X86-AVX-NEXT: vmovss %xmm0, (%esp) 551; X86-AVX-NEXT: popl %eax 552; X86-AVX-NEXT: retl 553; 554; X64-SSE-LABEL: fadd_32stack: 555; X64-SSE: # %bb.0: 556; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 557; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0 558; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) 559; X64-SSE-NEXT: retq 560; 561; X64-AVX-LABEL: fadd_32stack: 562; X64-AVX: # %bb.0: 563; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 564; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0 565; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 566; X64-AVX-NEXT: retq 567 %ptr = alloca i32, align 4 568 %bc3 = bitcast i32* %ptr to float* 569 %load = load atomic i32, i32* %ptr acquire, align 4 570 %bc0 = bitcast i32 %load to float 571 %fadd = fadd float 1.000000e+00, %bc0 572 %bc1 = bitcast float %fadd to i32 573 store atomic i32 %bc1, i32* %ptr release, align 4 574 ret void 575} 576 577define void @fadd_64stack() nounwind { 578; X86-NOSSE-LABEL: fadd_64stack: 579; X86-NOSSE: # %bb.0: 580; X86-NOSSE-NEXT: pushl %ebp 581; X86-NOSSE-NEXT: movl %esp, %ebp 582; X86-NOSSE-NEXT: andl $-8, %esp 583; X86-NOSSE-NEXT: subl $40, %esp 584; X86-NOSSE-NEXT: fildll {{[0-9]+}}(%esp) 585; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 586; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 587; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 588; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 589; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) 590; X86-NOSSE-NEXT: fld1 591; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) 592; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 593; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax 594; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 595; X86-NOSSE-NEXT: movl %eax, (%esp) 596; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) 597; X86-NOSSE-NEXT: fildll (%esp) 598; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 599; X86-NOSSE-NEXT: movl %ebp, %esp 600; X86-NOSSE-NEXT: popl %ebp 601; X86-NOSSE-NEXT: retl 602; 603; X86-SSE1-LABEL: fadd_64stack: 604; X86-SSE1: # %bb.0: 605; X86-SSE1-NEXT: pushl %ebp 606; X86-SSE1-NEXT: movl %esp, %ebp 607; X86-SSE1-NEXT: andl $-8, %esp 608; X86-SSE1-NEXT: subl $24, %esp 609; X86-SSE1-NEXT: xorps %xmm0, %xmm0 610; X86-SSE1-NEXT: xorps %xmm1, %xmm1 611; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 612; X86-SSE1-NEXT: movss %xmm1, (%esp) 613; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 614; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 615; X86-SSE1-NEXT: fld1 616; X86-SSE1-NEXT: faddl (%esp) 617; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 618; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 619; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 620; X86-SSE1-NEXT: movl %ebp, %esp 621; X86-SSE1-NEXT: popl %ebp 622; X86-SSE1-NEXT: retl 623; 624; X86-SSE2-LABEL: fadd_64stack: 625; X86-SSE2: # %bb.0: 626; X86-SSE2-NEXT: pushl %ebp 627; X86-SSE2-NEXT: movl %esp, %ebp 628; X86-SSE2-NEXT: andl $-8, %esp 629; X86-SSE2-NEXT: subl $16, %esp 630; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 631; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0 632; X86-SSE2-NEXT: movsd %xmm0, (%esp) 633; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 634; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 635; X86-SSE2-NEXT: movl %ebp, %esp 636; X86-SSE2-NEXT: popl %ebp 637; X86-SSE2-NEXT: retl 638; 639; X86-AVX-LABEL: fadd_64stack: 640; X86-AVX: # %bb.0: 641; X86-AVX-NEXT: pushl %ebp 642; X86-AVX-NEXT: movl %esp, %ebp 643; X86-AVX-NEXT: andl $-8, %esp 644; X86-AVX-NEXT: subl $16, %esp 645; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 646; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0 647; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 648; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 649; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 650; X86-AVX-NEXT: movl %ebp, %esp 651; X86-AVX-NEXT: popl %ebp 652; X86-AVX-NEXT: retl 653; 654; X64-SSE-LABEL: fadd_64stack: 655; X64-SSE: # %bb.0: 656; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 657; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0 658; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 659; X64-SSE-NEXT: retq 660; 661; X64-AVX-LABEL: fadd_64stack: 662; X64-AVX: # %bb.0: 663; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 664; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 665; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 666; X64-AVX-NEXT: retq 667 %ptr = alloca i64, align 8 668 %bc3 = bitcast i64* %ptr to double* 669 %load = load atomic i64, i64* %ptr acquire, align 8 670 %bc0 = bitcast i64 %load to double 671 %fadd = fadd double 1.000000e+00, %bc0 672 %bc1 = bitcast double %fadd to i64 673 store atomic i64 %bc1, i64* %ptr release, align 8 674 ret void 675} 676 677define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind { 678; X86-NOSSE-LABEL: fadd_array: 679; X86-NOSSE: # %bb.0: # %bb 680; X86-NOSSE-NEXT: pushl %ebp 681; X86-NOSSE-NEXT: movl %esp, %ebp 682; X86-NOSSE-NEXT: pushl %esi 683; X86-NOSSE-NEXT: andl $-8, %esp 684; X86-NOSSE-NEXT: subl $40, %esp 685; X86-NOSSE-NEXT: movl 20(%ebp), %eax 686; X86-NOSSE-NEXT: movl 8(%ebp), %ecx 687; X86-NOSSE-NEXT: fildll (%ecx,%eax,8) 688; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) 689; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 690; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 691; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 692; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) 693; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) 694; X86-NOSSE-NEXT: faddl 12(%ebp) 695; X86-NOSSE-NEXT: fstpl {{[0-9]+}}(%esp) 696; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx 697; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi 698; X86-NOSSE-NEXT: movl %edx, (%esp) 699; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 700; X86-NOSSE-NEXT: fildll (%esp) 701; X86-NOSSE-NEXT: fistpll (%ecx,%eax,8) 702; X86-NOSSE-NEXT: leal -4(%ebp), %esp 703; X86-NOSSE-NEXT: popl %esi 704; X86-NOSSE-NEXT: popl %ebp 705; X86-NOSSE-NEXT: retl 706; 707; X86-SSE1-LABEL: fadd_array: 708; X86-SSE1: # %bb.0: # %bb 709; X86-SSE1-NEXT: pushl %ebp 710; X86-SSE1-NEXT: movl %esp, %ebp 711; X86-SSE1-NEXT: andl $-8, %esp 712; X86-SSE1-NEXT: subl $16, %esp 713; X86-SSE1-NEXT: movl 20(%ebp), %eax 714; X86-SSE1-NEXT: movl 8(%ebp), %ecx 715; X86-SSE1-NEXT: xorps %xmm0, %xmm0 716; X86-SSE1-NEXT: xorps %xmm1, %xmm1 717; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 718; X86-SSE1-NEXT: movss %xmm1, (%esp) 719; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 720; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 721; X86-SSE1-NEXT: fldl (%esp) 722; X86-SSE1-NEXT: faddl 12(%ebp) 723; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp) 724; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 725; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8) 726; X86-SSE1-NEXT: movl %ebp, %esp 727; X86-SSE1-NEXT: popl %ebp 728; X86-SSE1-NEXT: retl 729; 730; X86-SSE2-LABEL: fadd_array: 731; X86-SSE2: # %bb.0: # %bb 732; X86-SSE2-NEXT: pushl %ebp 733; X86-SSE2-NEXT: movl %esp, %ebp 734; X86-SSE2-NEXT: andl $-8, %esp 735; X86-SSE2-NEXT: subl $8, %esp 736; X86-SSE2-NEXT: movl 20(%ebp), %eax 737; X86-SSE2-NEXT: movl 8(%ebp), %ecx 738; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 739; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0 740; X86-SSE2-NEXT: movsd %xmm0, (%esp) 741; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 742; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8) 743; X86-SSE2-NEXT: movl %ebp, %esp 744; X86-SSE2-NEXT: popl %ebp 745; X86-SSE2-NEXT: retl 746; 747; X86-AVX-LABEL: fadd_array: 748; X86-AVX: # %bb.0: # %bb 749; X86-AVX-NEXT: pushl %ebp 750; X86-AVX-NEXT: movl %esp, %ebp 751; X86-AVX-NEXT: andl $-8, %esp 752; X86-AVX-NEXT: subl $8, %esp 753; X86-AVX-NEXT: movl 20(%ebp), %eax 754; X86-AVX-NEXT: movl 8(%ebp), %ecx 755; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 756; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0 757; X86-AVX-NEXT: vmovsd %xmm0, (%esp) 758; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 759; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8) 760; X86-AVX-NEXT: movl %ebp, %esp 761; X86-AVX-NEXT: popl %ebp 762; X86-AVX-NEXT: retl 763; 764; X64-SSE-LABEL: fadd_array: 765; X64-SSE: # %bb.0: # %bb 766; X64-SSE-NEXT: addsd (%rdi,%rsi,8), %xmm0 767; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8) 768; X64-SSE-NEXT: retq 769; 770; X64-AVX-LABEL: fadd_array: 771; X64-AVX: # %bb.0: # %bb 772; X64-AVX-NEXT: vaddsd (%rdi,%rsi,8), %xmm0, %xmm0 773; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8) 774; X64-AVX-NEXT: retq 775bb: 776 %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2 777 %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8 778 %tmp7 = bitcast i64 %tmp6 to double 779 %tmp8 = fadd double %tmp7, %arg1 780 %tmp9 = bitcast double %tmp8 to i64 781 store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8 782 ret void 783} 784