1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X86 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X86 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X64 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87 9 10declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) 11declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 12declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) 13declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) 14declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) 15declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) 16declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) 17declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) 18declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) 19declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) 20declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) 21declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) 22declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) 23declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) 24 25define double @fadd_f64(double %a, double %b) nounwind strictfp { 26; SSE-X86-LABEL: fadd_f64: 27; SSE-X86: # %bb.0: 28; SSE-X86-NEXT: pushl %ebp 29; SSE-X86-NEXT: movl %esp, %ebp 30; SSE-X86-NEXT: andl $-8, %esp 31; SSE-X86-NEXT: subl $8, %esp 32; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 33; SSE-X86-NEXT: addsd 16(%ebp), %xmm0 34; SSE-X86-NEXT: movsd %xmm0, (%esp) 35; SSE-X86-NEXT: fldl (%esp) 36; SSE-X86-NEXT: wait 37; SSE-X86-NEXT: movl %ebp, %esp 38; SSE-X86-NEXT: popl %ebp 39; SSE-X86-NEXT: retl 40; 41; SSE-X64-LABEL: fadd_f64: 42; SSE-X64: # %bb.0: 43; SSE-X64-NEXT: addsd %xmm1, %xmm0 44; SSE-X64-NEXT: retq 45; 46; AVX-X86-LABEL: fadd_f64: 47; AVX-X86: # %bb.0: 48; AVX-X86-NEXT: pushl %ebp 49; AVX-X86-NEXT: movl %esp, %ebp 50; AVX-X86-NEXT: andl $-8, %esp 51; AVX-X86-NEXT: subl $8, %esp 52; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 53; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0 54; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 55; AVX-X86-NEXT: fldl (%esp) 56; AVX-X86-NEXT: wait 57; AVX-X86-NEXT: movl %ebp, %esp 58; AVX-X86-NEXT: popl %ebp 59; AVX-X86-NEXT: retl 60; 61; AVX-X64-LABEL: fadd_f64: 62; AVX-X64: # %bb.0: 63; AVX-X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 64; AVX-X64-NEXT: retq 65; 66; X87-LABEL: fadd_f64: 67; X87: # %bb.0: 68; X87-NEXT: fldl {{[0-9]+}}(%esp) 69; X87-NEXT: faddl {{[0-9]+}}(%esp) 70; X87-NEXT: wait 71; X87-NEXT: retl 72 %ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, 73 metadata !"round.dynamic", 74 metadata !"fpexcept.strict") #0 75 ret double %ret 76} 77 78define float @fadd_f32(float %a, float %b) nounwind strictfp { 79; SSE-X86-LABEL: fadd_f32: 80; SSE-X86: # %bb.0: 81; SSE-X86-NEXT: pushl %eax 82; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 83; SSE-X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 84; SSE-X86-NEXT: movss %xmm0, (%esp) 85; SSE-X86-NEXT: flds (%esp) 86; SSE-X86-NEXT: wait 87; SSE-X86-NEXT: popl %eax 88; SSE-X86-NEXT: retl 89; 90; SSE-X64-LABEL: fadd_f32: 91; SSE-X64: # %bb.0: 92; SSE-X64-NEXT: addss %xmm1, %xmm0 93; SSE-X64-NEXT: retq 94; 95; AVX-X86-LABEL: fadd_f32: 96; AVX-X86: # %bb.0: 97; AVX-X86-NEXT: pushl %eax 98; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 99; AVX-X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 100; AVX-X86-NEXT: vmovss %xmm0, (%esp) 101; AVX-X86-NEXT: flds (%esp) 102; AVX-X86-NEXT: wait 103; AVX-X86-NEXT: popl %eax 104; AVX-X86-NEXT: retl 105; 106; AVX-X64-LABEL: fadd_f32: 107; AVX-X64: # %bb.0: 108; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 109; AVX-X64-NEXT: retq 110; 111; X87-LABEL: fadd_f32: 112; X87: # %bb.0: 113; X87-NEXT: flds {{[0-9]+}}(%esp) 114; X87-NEXT: fadds {{[0-9]+}}(%esp) 115; X87-NEXT: wait 116; X87-NEXT: retl 117 %ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, 118 metadata !"round.dynamic", 119 metadata !"fpexcept.strict") #0 120 ret float %ret 121} 122 123define double @fsub_f64(double %a, double %b) nounwind strictfp { 124; SSE-X86-LABEL: fsub_f64: 125; SSE-X86: # %bb.0: 126; SSE-X86-NEXT: pushl %ebp 127; SSE-X86-NEXT: movl %esp, %ebp 128; SSE-X86-NEXT: andl $-8, %esp 129; SSE-X86-NEXT: subl $8, %esp 130; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 131; SSE-X86-NEXT: subsd 16(%ebp), %xmm0 132; SSE-X86-NEXT: movsd %xmm0, (%esp) 133; SSE-X86-NEXT: fldl (%esp) 134; SSE-X86-NEXT: wait 135; SSE-X86-NEXT: movl %ebp, %esp 136; SSE-X86-NEXT: popl %ebp 137; SSE-X86-NEXT: retl 138; 139; SSE-X64-LABEL: fsub_f64: 140; SSE-X64: # %bb.0: 141; SSE-X64-NEXT: subsd %xmm1, %xmm0 142; SSE-X64-NEXT: retq 143; 144; AVX-X86-LABEL: fsub_f64: 145; AVX-X86: # %bb.0: 146; AVX-X86-NEXT: pushl %ebp 147; AVX-X86-NEXT: movl %esp, %ebp 148; AVX-X86-NEXT: andl $-8, %esp 149; AVX-X86-NEXT: subl $8, %esp 150; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 151; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0 152; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 153; AVX-X86-NEXT: fldl (%esp) 154; AVX-X86-NEXT: wait 155; AVX-X86-NEXT: movl %ebp, %esp 156; AVX-X86-NEXT: popl %ebp 157; AVX-X86-NEXT: retl 158; 159; AVX-X64-LABEL: fsub_f64: 160; AVX-X64: # %bb.0: 161; AVX-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 162; AVX-X64-NEXT: retq 163; 164; X87-LABEL: fsub_f64: 165; X87: # %bb.0: 166; X87-NEXT: fldl {{[0-9]+}}(%esp) 167; X87-NEXT: fsubl {{[0-9]+}}(%esp) 168; X87-NEXT: wait 169; X87-NEXT: retl 170 %ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, 171 metadata !"round.dynamic", 172 metadata !"fpexcept.strict") #0 173 ret double %ret 174} 175 176define float @fsub_f32(float %a, float %b) nounwind strictfp { 177; SSE-X86-LABEL: fsub_f32: 178; SSE-X86: # %bb.0: 179; SSE-X86-NEXT: pushl %eax 180; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 181; SSE-X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0 182; SSE-X86-NEXT: movss %xmm0, (%esp) 183; SSE-X86-NEXT: flds (%esp) 184; SSE-X86-NEXT: wait 185; SSE-X86-NEXT: popl %eax 186; SSE-X86-NEXT: retl 187; 188; SSE-X64-LABEL: fsub_f32: 189; SSE-X64: # %bb.0: 190; SSE-X64-NEXT: subss %xmm1, %xmm0 191; SSE-X64-NEXT: retq 192; 193; AVX-X86-LABEL: fsub_f32: 194; AVX-X86: # %bb.0: 195; AVX-X86-NEXT: pushl %eax 196; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 197; AVX-X86-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 198; AVX-X86-NEXT: vmovss %xmm0, (%esp) 199; AVX-X86-NEXT: flds (%esp) 200; AVX-X86-NEXT: wait 201; AVX-X86-NEXT: popl %eax 202; AVX-X86-NEXT: retl 203; 204; AVX-X64-LABEL: fsub_f32: 205; AVX-X64: # %bb.0: 206; AVX-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 207; AVX-X64-NEXT: retq 208; 209; X87-LABEL: fsub_f32: 210; X87: # %bb.0: 211; X87-NEXT: flds {{[0-9]+}}(%esp) 212; X87-NEXT: fsubs {{[0-9]+}}(%esp) 213; X87-NEXT: wait 214; X87-NEXT: retl 215 %ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b, 216 metadata !"round.dynamic", 217 metadata !"fpexcept.strict") #0 218 ret float %ret 219} 220 221define double @fmul_f64(double %a, double %b) nounwind strictfp { 222; SSE-X86-LABEL: fmul_f64: 223; SSE-X86: # %bb.0: 224; SSE-X86-NEXT: pushl %ebp 225; SSE-X86-NEXT: movl %esp, %ebp 226; SSE-X86-NEXT: andl $-8, %esp 227; SSE-X86-NEXT: subl $8, %esp 228; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 229; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0 230; SSE-X86-NEXT: movsd %xmm0, (%esp) 231; SSE-X86-NEXT: fldl (%esp) 232; SSE-X86-NEXT: wait 233; SSE-X86-NEXT: movl %ebp, %esp 234; SSE-X86-NEXT: popl %ebp 235; SSE-X86-NEXT: retl 236; 237; SSE-X64-LABEL: fmul_f64: 238; SSE-X64: # %bb.0: 239; SSE-X64-NEXT: mulsd %xmm1, %xmm0 240; SSE-X64-NEXT: retq 241; 242; AVX-X86-LABEL: fmul_f64: 243; AVX-X86: # %bb.0: 244; AVX-X86-NEXT: pushl %ebp 245; AVX-X86-NEXT: movl %esp, %ebp 246; AVX-X86-NEXT: andl $-8, %esp 247; AVX-X86-NEXT: subl $8, %esp 248; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 249; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0 250; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 251; AVX-X86-NEXT: fldl (%esp) 252; AVX-X86-NEXT: wait 253; AVX-X86-NEXT: movl %ebp, %esp 254; AVX-X86-NEXT: popl %ebp 255; AVX-X86-NEXT: retl 256; 257; AVX-X64-LABEL: fmul_f64: 258; AVX-X64: # %bb.0: 259; AVX-X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 260; AVX-X64-NEXT: retq 261; 262; X87-LABEL: fmul_f64: 263; X87: # %bb.0: 264; X87-NEXT: fldl {{[0-9]+}}(%esp) 265; X87-NEXT: fmull {{[0-9]+}}(%esp) 266; X87-NEXT: wait 267; X87-NEXT: retl 268 %ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, 269 metadata !"round.dynamic", 270 metadata !"fpexcept.strict") #0 271 ret double %ret 272} 273 274define float @fmul_f32(float %a, float %b) nounwind strictfp { 275; SSE-X86-LABEL: fmul_f32: 276; SSE-X86: # %bb.0: 277; SSE-X86-NEXT: pushl %eax 278; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 279; SSE-X86-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 280; SSE-X86-NEXT: movss %xmm0, (%esp) 281; SSE-X86-NEXT: flds (%esp) 282; SSE-X86-NEXT: wait 283; SSE-X86-NEXT: popl %eax 284; SSE-X86-NEXT: retl 285; 286; SSE-X64-LABEL: fmul_f32: 287; SSE-X64: # %bb.0: 288; SSE-X64-NEXT: mulss %xmm1, %xmm0 289; SSE-X64-NEXT: retq 290; 291; AVX-X86-LABEL: fmul_f32: 292; AVX-X86: # %bb.0: 293; AVX-X86-NEXT: pushl %eax 294; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 295; AVX-X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 296; AVX-X86-NEXT: vmovss %xmm0, (%esp) 297; AVX-X86-NEXT: flds (%esp) 298; AVX-X86-NEXT: wait 299; AVX-X86-NEXT: popl %eax 300; AVX-X86-NEXT: retl 301; 302; AVX-X64-LABEL: fmul_f32: 303; AVX-X64: # %bb.0: 304; AVX-X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 305; AVX-X64-NEXT: retq 306; 307; X87-LABEL: fmul_f32: 308; X87: # %bb.0: 309; X87-NEXT: flds {{[0-9]+}}(%esp) 310; X87-NEXT: fmuls {{[0-9]+}}(%esp) 311; X87-NEXT: wait 312; X87-NEXT: retl 313 %ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b, 314 metadata !"round.dynamic", 315 metadata !"fpexcept.strict") #0 316 ret float %ret 317} 318 319define double @fdiv_f64(double %a, double %b) nounwind strictfp { 320; SSE-X86-LABEL: fdiv_f64: 321; SSE-X86: # %bb.0: 322; SSE-X86-NEXT: pushl %ebp 323; SSE-X86-NEXT: movl %esp, %ebp 324; SSE-X86-NEXT: andl $-8, %esp 325; SSE-X86-NEXT: subl $8, %esp 326; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 327; SSE-X86-NEXT: divsd 16(%ebp), %xmm0 328; SSE-X86-NEXT: movsd %xmm0, (%esp) 329; SSE-X86-NEXT: fldl (%esp) 330; SSE-X86-NEXT: wait 331; SSE-X86-NEXT: movl %ebp, %esp 332; SSE-X86-NEXT: popl %ebp 333; SSE-X86-NEXT: retl 334; 335; SSE-X64-LABEL: fdiv_f64: 336; SSE-X64: # %bb.0: 337; SSE-X64-NEXT: divsd %xmm1, %xmm0 338; SSE-X64-NEXT: retq 339; 340; AVX-X86-LABEL: fdiv_f64: 341; AVX-X86: # %bb.0: 342; AVX-X86-NEXT: pushl %ebp 343; AVX-X86-NEXT: movl %esp, %ebp 344; AVX-X86-NEXT: andl $-8, %esp 345; AVX-X86-NEXT: subl $8, %esp 346; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 347; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0 348; AVX-X86-NEXT: vmovsd %xmm0, (%esp) 349; AVX-X86-NEXT: fldl (%esp) 350; AVX-X86-NEXT: wait 351; AVX-X86-NEXT: movl %ebp, %esp 352; AVX-X86-NEXT: popl %ebp 353; AVX-X86-NEXT: retl 354; 355; AVX-X64-LABEL: fdiv_f64: 356; AVX-X64: # %bb.0: 357; AVX-X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 358; AVX-X64-NEXT: retq 359; 360; X87-LABEL: fdiv_f64: 361; X87: # %bb.0: 362; X87-NEXT: fldl {{[0-9]+}}(%esp) 363; X87-NEXT: fdivl {{[0-9]+}}(%esp) 364; X87-NEXT: wait 365; X87-NEXT: retl 366 %ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, 367 metadata !"round.dynamic", 368 metadata !"fpexcept.strict") #0 369 ret double %ret 370} 371 372define float @fdiv_f32(float %a, float %b) nounwind strictfp { 373; SSE-X86-LABEL: fdiv_f32: 374; SSE-X86: # %bb.0: 375; SSE-X86-NEXT: pushl %eax 376; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 377; SSE-X86-NEXT: divss {{[0-9]+}}(%esp), %xmm0 378; SSE-X86-NEXT: movss %xmm0, (%esp) 379; SSE-X86-NEXT: flds (%esp) 380; SSE-X86-NEXT: wait 381; SSE-X86-NEXT: popl %eax 382; SSE-X86-NEXT: retl 383; 384; SSE-X64-LABEL: fdiv_f32: 385; SSE-X64: # %bb.0: 386; SSE-X64-NEXT: divss %xmm1, %xmm0 387; SSE-X64-NEXT: retq 388; 389; AVX-X86-LABEL: fdiv_f32: 390; AVX-X86: # %bb.0: 391; AVX-X86-NEXT: pushl %eax 392; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 393; AVX-X86-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 394; AVX-X86-NEXT: vmovss %xmm0, (%esp) 395; AVX-X86-NEXT: flds (%esp) 396; AVX-X86-NEXT: wait 397; AVX-X86-NEXT: popl %eax 398; AVX-X86-NEXT: retl 399; 400; AVX-X64-LABEL: fdiv_f32: 401; AVX-X64: # %bb.0: 402; AVX-X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 403; AVX-X64-NEXT: retq 404; 405; X87-LABEL: fdiv_f32: 406; X87: # %bb.0: 407; X87-NEXT: flds {{[0-9]+}}(%esp) 408; X87-NEXT: fdivs {{[0-9]+}}(%esp) 409; X87-NEXT: wait 410; X87-NEXT: retl 411 %ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b, 412 metadata !"round.dynamic", 413 metadata !"fpexcept.strict") #0 414 ret float %ret 415} 416 417define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp { 418; SSE-X86-LABEL: fpext_f32_to_f64: 419; SSE-X86: # %bb.0: 420; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 421; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 422; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 423; SSE-X86-NEXT: cvtss2sd %xmm0, %xmm0 424; SSE-X86-NEXT: movsd %xmm0, (%eax) 425; SSE-X86-NEXT: retl 426; 427; SSE-X64-LABEL: fpext_f32_to_f64: 428; SSE-X64: # %bb.0: 429; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 430; SSE-X64-NEXT: cvtss2sd %xmm0, %xmm0 431; SSE-X64-NEXT: movsd %xmm0, (%rsi) 432; SSE-X64-NEXT: retq 433; 434; AVX-X86-LABEL: fpext_f32_to_f64: 435; AVX-X86: # %bb.0: 436; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 437; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 438; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 439; AVX-X86-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 440; AVX-X86-NEXT: vmovsd %xmm0, (%eax) 441; AVX-X86-NEXT: retl 442; 443; AVX-X64-LABEL: fpext_f32_to_f64: 444; AVX-X64: # %bb.0: 445; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 446; AVX-X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 447; AVX-X64-NEXT: vmovsd %xmm0, (%rsi) 448; AVX-X64-NEXT: retq 449; 450; X87-LABEL: fpext_f32_to_f64: 451; X87: # %bb.0: 452; X87-NEXT: movl {{[0-9]+}}(%esp), %eax 453; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx 454; X87-NEXT: flds (%ecx) 455; X87-NEXT: fstpl (%eax) 456; X87-NEXT: wait 457; X87-NEXT: retl 458 %1 = load float, float* %val, align 4 459 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, 460 metadata !"fpexcept.strict") #0 461 store double %res, double* %ret, align 8 462 ret void 463} 464 465define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp { 466; SSE-X86-LABEL: fptrunc_double_to_f32: 467; SSE-X86: # %bb.0: 468; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 469; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 470; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 471; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0 472; SSE-X86-NEXT: movss %xmm0, (%eax) 473; SSE-X86-NEXT: retl 474; 475; SSE-X64-LABEL: fptrunc_double_to_f32: 476; SSE-X64: # %bb.0: 477; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 478; SSE-X64-NEXT: cvtsd2ss %xmm0, %xmm0 479; SSE-X64-NEXT: movss %xmm0, (%rsi) 480; SSE-X64-NEXT: retq 481; 482; AVX-X86-LABEL: fptrunc_double_to_f32: 483; AVX-X86: # %bb.0: 484; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 485; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 486; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 487; AVX-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 488; AVX-X86-NEXT: vmovss %xmm0, (%eax) 489; AVX-X86-NEXT: retl 490; 491; AVX-X64-LABEL: fptrunc_double_to_f32: 492; AVX-X64: # %bb.0: 493; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 494; AVX-X64-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 495; AVX-X64-NEXT: vmovss %xmm0, (%rsi) 496; AVX-X64-NEXT: retq 497; 498; X87-LABEL: fptrunc_double_to_f32: 499; X87: # %bb.0: 500; X87-NEXT: pushl %eax 501; X87-NEXT: movl {{[0-9]+}}(%esp), %eax 502; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx 503; X87-NEXT: fldl (%ecx) 504; X87-NEXT: fstps (%esp) 505; X87-NEXT: flds (%esp) 506; X87-NEXT: fstps (%eax) 507; X87-NEXT: wait 508; X87-NEXT: popl %eax 509; X87-NEXT: retl 510 %1 = load double, double* %val, align 8 511 %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1, 512 metadata !"round.dynamic", 513 metadata !"fpexcept.strict") #0 514 store float %res, float* %ret, align 4 515 ret void 516} 517 518define void @fsqrt_f64(double* %a) nounwind strictfp { 519; SSE-X86-LABEL: fsqrt_f64: 520; SSE-X86: # %bb.0: 521; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 522; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 523; SSE-X86-NEXT: sqrtsd %xmm0, %xmm0 524; SSE-X86-NEXT: movsd %xmm0, (%eax) 525; SSE-X86-NEXT: retl 526; 527; SSE-X64-LABEL: fsqrt_f64: 528; SSE-X64: # %bb.0: 529; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 530; SSE-X64-NEXT: sqrtsd %xmm0, %xmm0 531; SSE-X64-NEXT: movsd %xmm0, (%rdi) 532; SSE-X64-NEXT: retq 533; 534; AVX-X86-LABEL: fsqrt_f64: 535; AVX-X86: # %bb.0: 536; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 537; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 538; AVX-X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 539; AVX-X86-NEXT: vmovsd %xmm0, (%eax) 540; AVX-X86-NEXT: retl 541; 542; AVX-X64-LABEL: fsqrt_f64: 543; AVX-X64: # %bb.0: 544; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 545; AVX-X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 546; AVX-X64-NEXT: vmovsd %xmm0, (%rdi) 547; AVX-X64-NEXT: retq 548; 549; X87-LABEL: fsqrt_f64: 550; X87: # %bb.0: 551; X87-NEXT: movl {{[0-9]+}}(%esp), %eax 552; X87-NEXT: fldl (%eax) 553; X87-NEXT: fsqrt 554; X87-NEXT: fstpl (%eax) 555; X87-NEXT: wait 556; X87-NEXT: retl 557 %1 = load double, double* %a, align 8 558 %res = call double @llvm.experimental.constrained.sqrt.f64(double %1, 559 metadata !"round.dynamic", 560 metadata !"fpexcept.strict") #0 561 store double %res, double* %a, align 8 562 ret void 563} 564 565define void @fsqrt_f32(float* %a) nounwind strictfp { 566; SSE-X86-LABEL: fsqrt_f32: 567; SSE-X86: # %bb.0: 568; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 569; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 570; SSE-X86-NEXT: sqrtss %xmm0, %xmm0 571; SSE-X86-NEXT: movss %xmm0, (%eax) 572; SSE-X86-NEXT: retl 573; 574; SSE-X64-LABEL: fsqrt_f32: 575; SSE-X64: # %bb.0: 576; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 577; SSE-X64-NEXT: sqrtss %xmm0, %xmm0 578; SSE-X64-NEXT: movss %xmm0, (%rdi) 579; SSE-X64-NEXT: retq 580; 581; AVX-X86-LABEL: fsqrt_f32: 582; AVX-X86: # %bb.0: 583; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 584; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 585; AVX-X86-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 586; AVX-X86-NEXT: vmovss %xmm0, (%eax) 587; AVX-X86-NEXT: retl 588; 589; AVX-X64-LABEL: fsqrt_f32: 590; AVX-X64: # %bb.0: 591; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 592; AVX-X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 593; AVX-X64-NEXT: vmovss %xmm0, (%rdi) 594; AVX-X64-NEXT: retq 595; 596; X87-LABEL: fsqrt_f32: 597; X87: # %bb.0: 598; X87-NEXT: movl {{[0-9]+}}(%esp), %eax 599; X87-NEXT: flds (%eax) 600; X87-NEXT: fsqrt 601; X87-NEXT: fstps (%eax) 602; X87-NEXT: wait 603; X87-NEXT: retl 604 %1 = load float, float* %a, align 4 605 %res = call float @llvm.experimental.constrained.sqrt.f32(float %1, 606 metadata !"round.dynamic", 607 metadata !"fpexcept.strict") #0 608 store float %res, float* %a, align 4 609 ret void 610} 611 612define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { 613; SSE-X86-LABEL: fma_f64: 614; SSE-X86: # %bb.0: 615; SSE-X86-NEXT: subl $24, %esp 616; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 617; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 618; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 619; SSE-X86-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) 620; SSE-X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp) 621; SSE-X86-NEXT: movsd %xmm0, (%esp) 622; SSE-X86-NEXT: calll fma 623; SSE-X86-NEXT: addl $24, %esp 624; SSE-X86-NEXT: retl 625; 626; SSE-X64-LABEL: fma_f64: 627; SSE-X64: # %bb.0: 628; SSE-X64-NEXT: pushq %rax 629; SSE-X64-NEXT: callq fma 630; SSE-X64-NEXT: popq %rax 631; SSE-X64-NEXT: retq 632; 633; AVX-X86-LABEL: fma_f64: 634; AVX-X86: # %bb.0: 635; AVX-X86-NEXT: pushl %ebp 636; AVX-X86-NEXT: movl %esp, %ebp 637; AVX-X86-NEXT: andl $-8, %esp 638; AVX-X86-NEXT: subl $8, %esp 639; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 640; AVX-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 641; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem 642; AVX-X86-NEXT: vmovsd %xmm1, (%esp) 643; AVX-X86-NEXT: fldl (%esp) 644; AVX-X86-NEXT: wait 645; AVX-X86-NEXT: movl %ebp, %esp 646; AVX-X86-NEXT: popl %ebp 647; AVX-X86-NEXT: retl 648; 649; AVX-X64-LABEL: fma_f64: 650; AVX-X64: # %bb.0: 651; AVX-X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 652; AVX-X64-NEXT: retq 653; 654; X87-LABEL: fma_f64: 655; X87: # %bb.0: 656; X87-NEXT: subl $24, %esp 657; X87-NEXT: fldl {{[0-9]+}}(%esp) 658; X87-NEXT: fldl {{[0-9]+}}(%esp) 659; X87-NEXT: fldl {{[0-9]+}}(%esp) 660; X87-NEXT: fstpl {{[0-9]+}}(%esp) 661; X87-NEXT: fstpl {{[0-9]+}}(%esp) 662; X87-NEXT: fstpl (%esp) 663; X87-NEXT: wait 664; X87-NEXT: calll fma 665; X87-NEXT: addl $24, %esp 666; X87-NEXT: retl 667 %res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c, 668 metadata !"round.dynamic", 669 metadata !"fpexcept.strict") #0 670 ret double %res 671} 672 673define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { 674; SSE-X86-LABEL: fma_f32: 675; SSE-X86: # %bb.0: 676; SSE-X86-NEXT: subl $12, %esp 677; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 678; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 679; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 680; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp) 681; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp) 682; SSE-X86-NEXT: movss %xmm0, (%esp) 683; SSE-X86-NEXT: calll fmaf 684; SSE-X86-NEXT: addl $12, %esp 685; SSE-X86-NEXT: retl 686; 687; SSE-X64-LABEL: fma_f32: 688; SSE-X64: # %bb.0: 689; SSE-X64-NEXT: pushq %rax 690; SSE-X64-NEXT: callq fmaf 691; SSE-X64-NEXT: popq %rax 692; SSE-X64-NEXT: retq 693; 694; AVX-X86-LABEL: fma_f32: 695; AVX-X86: # %bb.0: 696; AVX-X86-NEXT: pushl %eax 697; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 698; AVX-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 699; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem 700; AVX-X86-NEXT: vmovss %xmm1, (%esp) 701; AVX-X86-NEXT: flds (%esp) 702; AVX-X86-NEXT: wait 703; AVX-X86-NEXT: popl %eax 704; AVX-X86-NEXT: retl 705; 706; AVX-X64-LABEL: fma_f32: 707; AVX-X64: # %bb.0: 708; AVX-X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 709; AVX-X64-NEXT: retq 710; 711; X87-LABEL: fma_f32: 712; X87: # %bb.0: 713; X87-NEXT: subl $12, %esp 714; X87-NEXT: flds {{[0-9]+}}(%esp) 715; X87-NEXT: flds {{[0-9]+}}(%esp) 716; X87-NEXT: flds {{[0-9]+}}(%esp) 717; X87-NEXT: fstps {{[0-9]+}}(%esp) 718; X87-NEXT: fstps {{[0-9]+}}(%esp) 719; X87-NEXT: fstps (%esp) 720; X87-NEXT: wait 721; X87-NEXT: calll fmaf 722; X87-NEXT: addl $12, %esp 723; X87-NEXT: retl 724 %res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, 725 metadata !"round.dynamic", 726 metadata !"fpexcept.strict") #0 727 ret float %res 728} 729 730attributes #0 = { strictfp } 731