1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X64
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X86
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X64
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X86
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X64
8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87
9
10declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
11declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
12declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
13declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
14declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
15declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
16declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
17declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
18declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
19declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
20declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
21declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
22declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
23declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
24
25define double @fadd_f64(double %a, double %b) nounwind strictfp {
26; SSE-X86-LABEL: fadd_f64:
27; SSE-X86:       # %bb.0:
28; SSE-X86-NEXT:    pushl %ebp
29; SSE-X86-NEXT:    movl %esp, %ebp
30; SSE-X86-NEXT:    andl $-8, %esp
31; SSE-X86-NEXT:    subl $8, %esp
32; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
33; SSE-X86-NEXT:    addsd 16(%ebp), %xmm0
34; SSE-X86-NEXT:    movsd %xmm0, (%esp)
35; SSE-X86-NEXT:    fldl (%esp)
36; SSE-X86-NEXT:    wait
37; SSE-X86-NEXT:    movl %ebp, %esp
38; SSE-X86-NEXT:    popl %ebp
39; SSE-X86-NEXT:    retl
40;
41; SSE-X64-LABEL: fadd_f64:
42; SSE-X64:       # %bb.0:
43; SSE-X64-NEXT:    addsd %xmm1, %xmm0
44; SSE-X64-NEXT:    retq
45;
46; AVX-X86-LABEL: fadd_f64:
47; AVX-X86:       # %bb.0:
48; AVX-X86-NEXT:    pushl %ebp
49; AVX-X86-NEXT:    movl %esp, %ebp
50; AVX-X86-NEXT:    andl $-8, %esp
51; AVX-X86-NEXT:    subl $8, %esp
52; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
53; AVX-X86-NEXT:    vaddsd 16(%ebp), %xmm0, %xmm0
54; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
55; AVX-X86-NEXT:    fldl (%esp)
56; AVX-X86-NEXT:    wait
57; AVX-X86-NEXT:    movl %ebp, %esp
58; AVX-X86-NEXT:    popl %ebp
59; AVX-X86-NEXT:    retl
60;
61; AVX-X64-LABEL: fadd_f64:
62; AVX-X64:       # %bb.0:
63; AVX-X64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
64; AVX-X64-NEXT:    retq
65;
66; X87-LABEL: fadd_f64:
67; X87:       # %bb.0:
68; X87-NEXT:    fldl {{[0-9]+}}(%esp)
69; X87-NEXT:    faddl {{[0-9]+}}(%esp)
70; X87-NEXT:    wait
71; X87-NEXT:    retl
72  %ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b,
73                                                             metadata !"round.dynamic",
74                                                             metadata !"fpexcept.strict") #0
75  ret double %ret
76}
77
78define float @fadd_f32(float %a, float %b) nounwind strictfp {
79; SSE-X86-LABEL: fadd_f32:
80; SSE-X86:       # %bb.0:
81; SSE-X86-NEXT:    pushl %eax
82; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
83; SSE-X86-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
84; SSE-X86-NEXT:    movss %xmm0, (%esp)
85; SSE-X86-NEXT:    flds (%esp)
86; SSE-X86-NEXT:    wait
87; SSE-X86-NEXT:    popl %eax
88; SSE-X86-NEXT:    retl
89;
90; SSE-X64-LABEL: fadd_f32:
91; SSE-X64:       # %bb.0:
92; SSE-X64-NEXT:    addss %xmm1, %xmm0
93; SSE-X64-NEXT:    retq
94;
95; AVX-X86-LABEL: fadd_f32:
96; AVX-X86:       # %bb.0:
97; AVX-X86-NEXT:    pushl %eax
98; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
99; AVX-X86-NEXT:    vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0
100; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
101; AVX-X86-NEXT:    flds (%esp)
102; AVX-X86-NEXT:    wait
103; AVX-X86-NEXT:    popl %eax
104; AVX-X86-NEXT:    retl
105;
106; AVX-X64-LABEL: fadd_f32:
107; AVX-X64:       # %bb.0:
108; AVX-X64-NEXT:    vaddss %xmm1, %xmm0, %xmm0
109; AVX-X64-NEXT:    retq
110;
111; X87-LABEL: fadd_f32:
112; X87:       # %bb.0:
113; X87-NEXT:    flds {{[0-9]+}}(%esp)
114; X87-NEXT:    fadds {{[0-9]+}}(%esp)
115; X87-NEXT:    wait
116; X87-NEXT:    retl
117  %ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b,
118                                                            metadata !"round.dynamic",
119                                                            metadata !"fpexcept.strict") #0
120  ret float %ret
121}
122
123define double @fsub_f64(double %a, double %b) nounwind strictfp {
124; SSE-X86-LABEL: fsub_f64:
125; SSE-X86:       # %bb.0:
126; SSE-X86-NEXT:    pushl %ebp
127; SSE-X86-NEXT:    movl %esp, %ebp
128; SSE-X86-NEXT:    andl $-8, %esp
129; SSE-X86-NEXT:    subl $8, %esp
130; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
131; SSE-X86-NEXT:    subsd 16(%ebp), %xmm0
132; SSE-X86-NEXT:    movsd %xmm0, (%esp)
133; SSE-X86-NEXT:    fldl (%esp)
134; SSE-X86-NEXT:    wait
135; SSE-X86-NEXT:    movl %ebp, %esp
136; SSE-X86-NEXT:    popl %ebp
137; SSE-X86-NEXT:    retl
138;
139; SSE-X64-LABEL: fsub_f64:
140; SSE-X64:       # %bb.0:
141; SSE-X64-NEXT:    subsd %xmm1, %xmm0
142; SSE-X64-NEXT:    retq
143;
144; AVX-X86-LABEL: fsub_f64:
145; AVX-X86:       # %bb.0:
146; AVX-X86-NEXT:    pushl %ebp
147; AVX-X86-NEXT:    movl %esp, %ebp
148; AVX-X86-NEXT:    andl $-8, %esp
149; AVX-X86-NEXT:    subl $8, %esp
150; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
151; AVX-X86-NEXT:    vsubsd 16(%ebp), %xmm0, %xmm0
152; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
153; AVX-X86-NEXT:    fldl (%esp)
154; AVX-X86-NEXT:    wait
155; AVX-X86-NEXT:    movl %ebp, %esp
156; AVX-X86-NEXT:    popl %ebp
157; AVX-X86-NEXT:    retl
158;
159; AVX-X64-LABEL: fsub_f64:
160; AVX-X64:       # %bb.0:
161; AVX-X64-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
162; AVX-X64-NEXT:    retq
163;
164; X87-LABEL: fsub_f64:
165; X87:       # %bb.0:
166; X87-NEXT:    fldl {{[0-9]+}}(%esp)
167; X87-NEXT:    fsubl {{[0-9]+}}(%esp)
168; X87-NEXT:    wait
169; X87-NEXT:    retl
170  %ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b,
171                                                             metadata !"round.dynamic",
172                                                             metadata !"fpexcept.strict") #0
173  ret double %ret
174}
175
176define float @fsub_f32(float %a, float %b) nounwind strictfp {
177; SSE-X86-LABEL: fsub_f32:
178; SSE-X86:       # %bb.0:
179; SSE-X86-NEXT:    pushl %eax
180; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
181; SSE-X86-NEXT:    subss {{[0-9]+}}(%esp), %xmm0
182; SSE-X86-NEXT:    movss %xmm0, (%esp)
183; SSE-X86-NEXT:    flds (%esp)
184; SSE-X86-NEXT:    wait
185; SSE-X86-NEXT:    popl %eax
186; SSE-X86-NEXT:    retl
187;
188; SSE-X64-LABEL: fsub_f32:
189; SSE-X64:       # %bb.0:
190; SSE-X64-NEXT:    subss %xmm1, %xmm0
191; SSE-X64-NEXT:    retq
192;
193; AVX-X86-LABEL: fsub_f32:
194; AVX-X86:       # %bb.0:
195; AVX-X86-NEXT:    pushl %eax
196; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
197; AVX-X86-NEXT:    vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
198; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
199; AVX-X86-NEXT:    flds (%esp)
200; AVX-X86-NEXT:    wait
201; AVX-X86-NEXT:    popl %eax
202; AVX-X86-NEXT:    retl
203;
204; AVX-X64-LABEL: fsub_f32:
205; AVX-X64:       # %bb.0:
206; AVX-X64-NEXT:    vsubss %xmm1, %xmm0, %xmm0
207; AVX-X64-NEXT:    retq
208;
209; X87-LABEL: fsub_f32:
210; X87:       # %bb.0:
211; X87-NEXT:    flds {{[0-9]+}}(%esp)
212; X87-NEXT:    fsubs {{[0-9]+}}(%esp)
213; X87-NEXT:    wait
214; X87-NEXT:    retl
215  %ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b,
216                                                            metadata !"round.dynamic",
217                                                            metadata !"fpexcept.strict") #0
218  ret float %ret
219}
220
221define double @fmul_f64(double %a, double %b) nounwind strictfp {
222; SSE-X86-LABEL: fmul_f64:
223; SSE-X86:       # %bb.0:
224; SSE-X86-NEXT:    pushl %ebp
225; SSE-X86-NEXT:    movl %esp, %ebp
226; SSE-X86-NEXT:    andl $-8, %esp
227; SSE-X86-NEXT:    subl $8, %esp
228; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
229; SSE-X86-NEXT:    mulsd 16(%ebp), %xmm0
230; SSE-X86-NEXT:    movsd %xmm0, (%esp)
231; SSE-X86-NEXT:    fldl (%esp)
232; SSE-X86-NEXT:    wait
233; SSE-X86-NEXT:    movl %ebp, %esp
234; SSE-X86-NEXT:    popl %ebp
235; SSE-X86-NEXT:    retl
236;
237; SSE-X64-LABEL: fmul_f64:
238; SSE-X64:       # %bb.0:
239; SSE-X64-NEXT:    mulsd %xmm1, %xmm0
240; SSE-X64-NEXT:    retq
241;
242; AVX-X86-LABEL: fmul_f64:
243; AVX-X86:       # %bb.0:
244; AVX-X86-NEXT:    pushl %ebp
245; AVX-X86-NEXT:    movl %esp, %ebp
246; AVX-X86-NEXT:    andl $-8, %esp
247; AVX-X86-NEXT:    subl $8, %esp
248; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
249; AVX-X86-NEXT:    vmulsd 16(%ebp), %xmm0, %xmm0
250; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
251; AVX-X86-NEXT:    fldl (%esp)
252; AVX-X86-NEXT:    wait
253; AVX-X86-NEXT:    movl %ebp, %esp
254; AVX-X86-NEXT:    popl %ebp
255; AVX-X86-NEXT:    retl
256;
257; AVX-X64-LABEL: fmul_f64:
258; AVX-X64:       # %bb.0:
259; AVX-X64-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
260; AVX-X64-NEXT:    retq
261;
262; X87-LABEL: fmul_f64:
263; X87:       # %bb.0:
264; X87-NEXT:    fldl {{[0-9]+}}(%esp)
265; X87-NEXT:    fmull {{[0-9]+}}(%esp)
266; X87-NEXT:    wait
267; X87-NEXT:    retl
268  %ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b,
269                                                             metadata !"round.dynamic",
270                                                             metadata !"fpexcept.strict") #0
271  ret double %ret
272}
273
274define float @fmul_f32(float %a, float %b) nounwind strictfp {
275; SSE-X86-LABEL: fmul_f32:
276; SSE-X86:       # %bb.0:
277; SSE-X86-NEXT:    pushl %eax
278; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
279; SSE-X86-NEXT:    mulss {{[0-9]+}}(%esp), %xmm0
280; SSE-X86-NEXT:    movss %xmm0, (%esp)
281; SSE-X86-NEXT:    flds (%esp)
282; SSE-X86-NEXT:    wait
283; SSE-X86-NEXT:    popl %eax
284; SSE-X86-NEXT:    retl
285;
286; SSE-X64-LABEL: fmul_f32:
287; SSE-X64:       # %bb.0:
288; SSE-X64-NEXT:    mulss %xmm1, %xmm0
289; SSE-X64-NEXT:    retq
290;
291; AVX-X86-LABEL: fmul_f32:
292; AVX-X86:       # %bb.0:
293; AVX-X86-NEXT:    pushl %eax
294; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
295; AVX-X86-NEXT:    vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0
296; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
297; AVX-X86-NEXT:    flds (%esp)
298; AVX-X86-NEXT:    wait
299; AVX-X86-NEXT:    popl %eax
300; AVX-X86-NEXT:    retl
301;
302; AVX-X64-LABEL: fmul_f32:
303; AVX-X64:       # %bb.0:
304; AVX-X64-NEXT:    vmulss %xmm1, %xmm0, %xmm0
305; AVX-X64-NEXT:    retq
306;
307; X87-LABEL: fmul_f32:
308; X87:       # %bb.0:
309; X87-NEXT:    flds {{[0-9]+}}(%esp)
310; X87-NEXT:    fmuls {{[0-9]+}}(%esp)
311; X87-NEXT:    wait
312; X87-NEXT:    retl
313  %ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b,
314                                                            metadata !"round.dynamic",
315                                                            metadata !"fpexcept.strict") #0
316  ret float %ret
317}
318
319define double @fdiv_f64(double %a, double %b) nounwind strictfp {
320; SSE-X86-LABEL: fdiv_f64:
321; SSE-X86:       # %bb.0:
322; SSE-X86-NEXT:    pushl %ebp
323; SSE-X86-NEXT:    movl %esp, %ebp
324; SSE-X86-NEXT:    andl $-8, %esp
325; SSE-X86-NEXT:    subl $8, %esp
326; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
327; SSE-X86-NEXT:    divsd 16(%ebp), %xmm0
328; SSE-X86-NEXT:    movsd %xmm0, (%esp)
329; SSE-X86-NEXT:    fldl (%esp)
330; SSE-X86-NEXT:    wait
331; SSE-X86-NEXT:    movl %ebp, %esp
332; SSE-X86-NEXT:    popl %ebp
333; SSE-X86-NEXT:    retl
334;
335; SSE-X64-LABEL: fdiv_f64:
336; SSE-X64:       # %bb.0:
337; SSE-X64-NEXT:    divsd %xmm1, %xmm0
338; SSE-X64-NEXT:    retq
339;
340; AVX-X86-LABEL: fdiv_f64:
341; AVX-X86:       # %bb.0:
342; AVX-X86-NEXT:    pushl %ebp
343; AVX-X86-NEXT:    movl %esp, %ebp
344; AVX-X86-NEXT:    andl $-8, %esp
345; AVX-X86-NEXT:    subl $8, %esp
346; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
347; AVX-X86-NEXT:    vdivsd 16(%ebp), %xmm0, %xmm0
348; AVX-X86-NEXT:    vmovsd %xmm0, (%esp)
349; AVX-X86-NEXT:    fldl (%esp)
350; AVX-X86-NEXT:    wait
351; AVX-X86-NEXT:    movl %ebp, %esp
352; AVX-X86-NEXT:    popl %ebp
353; AVX-X86-NEXT:    retl
354;
355; AVX-X64-LABEL: fdiv_f64:
356; AVX-X64:       # %bb.0:
357; AVX-X64-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
358; AVX-X64-NEXT:    retq
359;
360; X87-LABEL: fdiv_f64:
361; X87:       # %bb.0:
362; X87-NEXT:    fldl {{[0-9]+}}(%esp)
363; X87-NEXT:    fdivl {{[0-9]+}}(%esp)
364; X87-NEXT:    wait
365; X87-NEXT:    retl
366  %ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b,
367                                                             metadata !"round.dynamic",
368                                                             metadata !"fpexcept.strict") #0
369  ret double %ret
370}
371
372define float @fdiv_f32(float %a, float %b) nounwind strictfp {
373; SSE-X86-LABEL: fdiv_f32:
374; SSE-X86:       # %bb.0:
375; SSE-X86-NEXT:    pushl %eax
376; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
377; SSE-X86-NEXT:    divss {{[0-9]+}}(%esp), %xmm0
378; SSE-X86-NEXT:    movss %xmm0, (%esp)
379; SSE-X86-NEXT:    flds (%esp)
380; SSE-X86-NEXT:    wait
381; SSE-X86-NEXT:    popl %eax
382; SSE-X86-NEXT:    retl
383;
384; SSE-X64-LABEL: fdiv_f32:
385; SSE-X64:       # %bb.0:
386; SSE-X64-NEXT:    divss %xmm1, %xmm0
387; SSE-X64-NEXT:    retq
388;
389; AVX-X86-LABEL: fdiv_f32:
390; AVX-X86:       # %bb.0:
391; AVX-X86-NEXT:    pushl %eax
392; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
393; AVX-X86-NEXT:    vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0
394; AVX-X86-NEXT:    vmovss %xmm0, (%esp)
395; AVX-X86-NEXT:    flds (%esp)
396; AVX-X86-NEXT:    wait
397; AVX-X86-NEXT:    popl %eax
398; AVX-X86-NEXT:    retl
399;
400; AVX-X64-LABEL: fdiv_f32:
401; AVX-X64:       # %bb.0:
402; AVX-X64-NEXT:    vdivss %xmm1, %xmm0, %xmm0
403; AVX-X64-NEXT:    retq
404;
405; X87-LABEL: fdiv_f32:
406; X87:       # %bb.0:
407; X87-NEXT:    flds {{[0-9]+}}(%esp)
408; X87-NEXT:    fdivs {{[0-9]+}}(%esp)
409; X87-NEXT:    wait
410; X87-NEXT:    retl
411  %ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b,
412                                                            metadata !"round.dynamic",
413                                                            metadata !"fpexcept.strict") #0
414  ret float %ret
415}
416
417define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp {
418; SSE-X86-LABEL: fpext_f32_to_f64:
419; SSE-X86:       # %bb.0:
420; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
421; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
422; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
423; SSE-X86-NEXT:    cvtss2sd %xmm0, %xmm0
424; SSE-X86-NEXT:    movsd %xmm0, (%eax)
425; SSE-X86-NEXT:    retl
426;
427; SSE-X64-LABEL: fpext_f32_to_f64:
428; SSE-X64:       # %bb.0:
429; SSE-X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
430; SSE-X64-NEXT:    cvtss2sd %xmm0, %xmm0
431; SSE-X64-NEXT:    movsd %xmm0, (%rsi)
432; SSE-X64-NEXT:    retq
433;
434; AVX-X86-LABEL: fpext_f32_to_f64:
435; AVX-X86:       # %bb.0:
436; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
437; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
438; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
439; AVX-X86-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
440; AVX-X86-NEXT:    vmovsd %xmm0, (%eax)
441; AVX-X86-NEXT:    retl
442;
443; AVX-X64-LABEL: fpext_f32_to_f64:
444; AVX-X64:       # %bb.0:
445; AVX-X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
446; AVX-X64-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
447; AVX-X64-NEXT:    vmovsd %xmm0, (%rsi)
448; AVX-X64-NEXT:    retq
449;
450; X87-LABEL: fpext_f32_to_f64:
451; X87:       # %bb.0:
452; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
453; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
454; X87-NEXT:    flds (%ecx)
455; X87-NEXT:    fstpl (%eax)
456; X87-NEXT:    wait
457; X87-NEXT:    retl
458  %1 = load float, float* %val, align 4
459  %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1,
460                                                                  metadata !"fpexcept.strict") #0
461  store double %res, double* %ret, align 8
462  ret void
463}
464
465define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp {
466; SSE-X86-LABEL: fptrunc_double_to_f32:
467; SSE-X86:       # %bb.0:
468; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
469; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
470; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
471; SSE-X86-NEXT:    cvtsd2ss %xmm0, %xmm0
472; SSE-X86-NEXT:    movss %xmm0, (%eax)
473; SSE-X86-NEXT:    retl
474;
475; SSE-X64-LABEL: fptrunc_double_to_f32:
476; SSE-X64:       # %bb.0:
477; SSE-X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
478; SSE-X64-NEXT:    cvtsd2ss %xmm0, %xmm0
479; SSE-X64-NEXT:    movss %xmm0, (%rsi)
480; SSE-X64-NEXT:    retq
481;
482; AVX-X86-LABEL: fptrunc_double_to_f32:
483; AVX-X86:       # %bb.0:
484; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
485; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
486; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
487; AVX-X86-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
488; AVX-X86-NEXT:    vmovss %xmm0, (%eax)
489; AVX-X86-NEXT:    retl
490;
491; AVX-X64-LABEL: fptrunc_double_to_f32:
492; AVX-X64:       # %bb.0:
493; AVX-X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
494; AVX-X64-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
495; AVX-X64-NEXT:    vmovss %xmm0, (%rsi)
496; AVX-X64-NEXT:    retq
497;
498; X87-LABEL: fptrunc_double_to_f32:
499; X87:       # %bb.0:
500; X87-NEXT:    pushl %eax
501; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
502; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
503; X87-NEXT:    fldl (%ecx)
504; X87-NEXT:    fstps (%esp)
505; X87-NEXT:    flds (%esp)
506; X87-NEXT:    fstps (%eax)
507; X87-NEXT:    wait
508; X87-NEXT:    popl %eax
509; X87-NEXT:    retl
510  %1 = load double, double* %val, align 8
511  %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1,
512                                                                   metadata !"round.dynamic",
513                                                                   metadata !"fpexcept.strict") #0
514  store float %res, float* %ret, align 4
515  ret void
516}
517
518define void @fsqrt_f64(double* %a) nounwind strictfp {
519; SSE-X86-LABEL: fsqrt_f64:
520; SSE-X86:       # %bb.0:
521; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
522; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
523; SSE-X86-NEXT:    sqrtsd %xmm0, %xmm0
524; SSE-X86-NEXT:    movsd %xmm0, (%eax)
525; SSE-X86-NEXT:    retl
526;
527; SSE-X64-LABEL: fsqrt_f64:
528; SSE-X64:       # %bb.0:
529; SSE-X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
530; SSE-X64-NEXT:    sqrtsd %xmm0, %xmm0
531; SSE-X64-NEXT:    movsd %xmm0, (%rdi)
532; SSE-X64-NEXT:    retq
533;
534; AVX-X86-LABEL: fsqrt_f64:
535; AVX-X86:       # %bb.0:
536; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
537; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
538; AVX-X86-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
539; AVX-X86-NEXT:    vmovsd %xmm0, (%eax)
540; AVX-X86-NEXT:    retl
541;
542; AVX-X64-LABEL: fsqrt_f64:
543; AVX-X64:       # %bb.0:
544; AVX-X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
545; AVX-X64-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
546; AVX-X64-NEXT:    vmovsd %xmm0, (%rdi)
547; AVX-X64-NEXT:    retq
548;
549; X87-LABEL: fsqrt_f64:
550; X87:       # %bb.0:
551; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
552; X87-NEXT:    fldl (%eax)
553; X87-NEXT:    fsqrt
554; X87-NEXT:    fstpl (%eax)
555; X87-NEXT:    wait
556; X87-NEXT:    retl
557  %1 = load double, double* %a, align 8
558  %res = call double @llvm.experimental.constrained.sqrt.f64(double %1,
559                                                             metadata !"round.dynamic",
560                                                             metadata !"fpexcept.strict") #0
561  store double %res, double* %a, align 8
562  ret void
563}
564
565define void @fsqrt_f32(float* %a) nounwind strictfp {
566; SSE-X86-LABEL: fsqrt_f32:
567; SSE-X86:       # %bb.0:
568; SSE-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
569; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
570; SSE-X86-NEXT:    sqrtss %xmm0, %xmm0
571; SSE-X86-NEXT:    movss %xmm0, (%eax)
572; SSE-X86-NEXT:    retl
573;
574; SSE-X64-LABEL: fsqrt_f32:
575; SSE-X64:       # %bb.0:
576; SSE-X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
577; SSE-X64-NEXT:    sqrtss %xmm0, %xmm0
578; SSE-X64-NEXT:    movss %xmm0, (%rdi)
579; SSE-X64-NEXT:    retq
580;
581; AVX-X86-LABEL: fsqrt_f32:
582; AVX-X86:       # %bb.0:
583; AVX-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
585; AVX-X86-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
586; AVX-X86-NEXT:    vmovss %xmm0, (%eax)
587; AVX-X86-NEXT:    retl
588;
589; AVX-X64-LABEL: fsqrt_f32:
590; AVX-X64:       # %bb.0:
591; AVX-X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
592; AVX-X64-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
593; AVX-X64-NEXT:    vmovss %xmm0, (%rdi)
594; AVX-X64-NEXT:    retq
595;
596; X87-LABEL: fsqrt_f32:
597; X87:       # %bb.0:
598; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
599; X87-NEXT:    flds (%eax)
600; X87-NEXT:    fsqrt
601; X87-NEXT:    fstps (%eax)
602; X87-NEXT:    wait
603; X87-NEXT:    retl
604  %1 = load float, float* %a, align 4
605  %res = call float @llvm.experimental.constrained.sqrt.f32(float %1,
606                                                            metadata !"round.dynamic",
607                                                            metadata !"fpexcept.strict") #0
608  store float %res, float* %a, align 4
609  ret void
610}
611
612define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
613; SSE-X86-LABEL: fma_f64:
614; SSE-X86:       # %bb.0:
615; SSE-X86-NEXT:    subl $24, %esp
616; SSE-X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
617; SSE-X86-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
618; SSE-X86-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
619; SSE-X86-NEXT:    movsd %xmm2, {{[0-9]+}}(%esp)
620; SSE-X86-NEXT:    movsd %xmm1, {{[0-9]+}}(%esp)
621; SSE-X86-NEXT:    movsd %xmm0, (%esp)
622; SSE-X86-NEXT:    calll fma
623; SSE-X86-NEXT:    addl $24, %esp
624; SSE-X86-NEXT:    retl
625;
626; SSE-X64-LABEL: fma_f64:
627; SSE-X64:       # %bb.0:
628; SSE-X64-NEXT:    pushq %rax
629; SSE-X64-NEXT:    callq fma
630; SSE-X64-NEXT:    popq %rax
631; SSE-X64-NEXT:    retq
632;
633; AVX-X86-LABEL: fma_f64:
634; AVX-X86:       # %bb.0:
635; AVX-X86-NEXT:    pushl %ebp
636; AVX-X86-NEXT:    movl %esp, %ebp
637; AVX-X86-NEXT:    andl $-8, %esp
638; AVX-X86-NEXT:    subl $8, %esp
639; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
640; AVX-X86-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
641; AVX-X86-NEXT:    vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
642; AVX-X86-NEXT:    vmovsd %xmm1, (%esp)
643; AVX-X86-NEXT:    fldl (%esp)
644; AVX-X86-NEXT:    wait
645; AVX-X86-NEXT:    movl %ebp, %esp
646; AVX-X86-NEXT:    popl %ebp
647; AVX-X86-NEXT:    retl
648;
649; AVX-X64-LABEL: fma_f64:
650; AVX-X64:       # %bb.0:
651; AVX-X64-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
652; AVX-X64-NEXT:    retq
653;
654; X87-LABEL: fma_f64:
655; X87:       # %bb.0:
656; X87-NEXT:    subl $24, %esp
657; X87-NEXT:    fldl {{[0-9]+}}(%esp)
658; X87-NEXT:    fldl {{[0-9]+}}(%esp)
659; X87-NEXT:    fldl {{[0-9]+}}(%esp)
660; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
661; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
662; X87-NEXT:    fstpl (%esp)
663; X87-NEXT:    wait
664; X87-NEXT:    calll fma
665; X87-NEXT:    addl $24, %esp
666; X87-NEXT:    retl
667  %res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c,
668                                                            metadata !"round.dynamic",
669                                                            metadata !"fpexcept.strict") #0
670  ret double %res
671}
672
673define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
674; SSE-X86-LABEL: fma_f32:
675; SSE-X86:       # %bb.0:
676; SSE-X86-NEXT:    subl $12, %esp
677; SSE-X86-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
678; SSE-X86-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
679; SSE-X86-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
680; SSE-X86-NEXT:    movss %xmm2, {{[0-9]+}}(%esp)
681; SSE-X86-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
682; SSE-X86-NEXT:    movss %xmm0, (%esp)
683; SSE-X86-NEXT:    calll fmaf
684; SSE-X86-NEXT:    addl $12, %esp
685; SSE-X86-NEXT:    retl
686;
687; SSE-X64-LABEL: fma_f32:
688; SSE-X64:       # %bb.0:
689; SSE-X64-NEXT:    pushq %rax
690; SSE-X64-NEXT:    callq fmaf
691; SSE-X64-NEXT:    popq %rax
692; SSE-X64-NEXT:    retq
693;
694; AVX-X86-LABEL: fma_f32:
695; AVX-X86:       # %bb.0:
696; AVX-X86-NEXT:    pushl %eax
697; AVX-X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
698; AVX-X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
699; AVX-X86-NEXT:    vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
700; AVX-X86-NEXT:    vmovss %xmm1, (%esp)
701; AVX-X86-NEXT:    flds (%esp)
702; AVX-X86-NEXT:    wait
703; AVX-X86-NEXT:    popl %eax
704; AVX-X86-NEXT:    retl
705;
706; AVX-X64-LABEL: fma_f32:
707; AVX-X64:       # %bb.0:
708; AVX-X64-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
709; AVX-X64-NEXT:    retq
710;
711; X87-LABEL: fma_f32:
712; X87:       # %bb.0:
713; X87-NEXT:    subl $12, %esp
714; X87-NEXT:    flds {{[0-9]+}}(%esp)
715; X87-NEXT:    flds {{[0-9]+}}(%esp)
716; X87-NEXT:    flds {{[0-9]+}}(%esp)
717; X87-NEXT:    fstps {{[0-9]+}}(%esp)
718; X87-NEXT:    fstps {{[0-9]+}}(%esp)
719; X87-NEXT:    fstps (%esp)
720; X87-NEXT:    wait
721; X87-NEXT:    calll fmaf
722; X87-NEXT:    addl $12, %esp
723; X87-NEXT:    retl
724  %res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c,
725                                                           metadata !"round.dynamic",
726                                                           metadata !"fpexcept.strict") #0
727  ret float %res
728}
729
730attributes #0 = { strictfp }
731