1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
5; 1.2f and 2.3f is supposed to be fold.
6define float @fold(float %a) {
7; CHECK-LABEL: @fold(
8; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
9; CHECK-NEXT:    ret float [[MUL1]]
10;
11  %mul = fmul fast float %a, 0x3FF3333340000000
12  %mul1 = fmul fast float %mul, 0x4002666660000000
13  ret float %mul1
14}
15
16; Same testing-case as the one used in fold() except that the operators have
17; fixed FP mode.
18define float @notfold(float %a) {
19; CHECK-LABEL: @notfold(
20; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
21; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
22; CHECK-NEXT:    ret float [[MUL1]]
23;
24  %mul = fmul fast float %a, 0x3FF3333340000000
25  %mul1 = fmul float %mul, 0x4002666660000000
26  ret float %mul1
27}
28
29define float @fold2(float %a) {
30; CHECK-LABEL: @fold2(
31; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
32; CHECK-NEXT:    ret float [[MUL1]]
33;
34  %mul = fmul float %a, 0x3FF3333340000000
35  %mul1 = fmul fast float %mul, 0x4002666660000000
36  ret float %mul1
37}
38
39; C * f1 + f1 = (C+1) * f1
40; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is
41; always safe, and so doesn't need any FMF.
42; That is, (x + x + x) and (3*x) each have only a single rounding.
43define double @fold3(double %f1) {
44; CHECK-LABEL: @fold3(
45; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00
46; CHECK-NEXT:    ret double [[TMP1]]
47;
48  %t1 = fmul fast double 5.000000e+00, %f1
49  %t2 = fadd fast double %f1, %t1
50  ret double %t2
51}
52
53; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
54define double @fold3_reassoc_nsz(double %f1) {
55; CHECK-LABEL: @fold3_reassoc_nsz(
56; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00
57; CHECK-NEXT:    ret double [[TMP1]]
58;
59  %t1 = fmul reassoc nsz double 5.000000e+00, %f1
60  %t2 = fadd reassoc nsz double %f1, %t1
61  ret double %t2
62}
63
64; TODO: This doesn't require 'nsz'.  It should fold to f1 * 6.0.
65define double @fold3_reassoc(double %f1) {
66; CHECK-LABEL: @fold3_reassoc(
67; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00
68; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[F1]]
69; CHECK-NEXT:    ret double [[TMP2]]
70;
71  %t1 = fmul reassoc double 5.000000e+00, %f1
72  %t2 = fadd reassoc double %f1, %t1
73  ret double %t2
74}
75
76; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
77define float @fold4(float %f1, float %f2) {
78; CHECK-LABEL: @fold4(
79; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
80; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]]
81; CHECK-NEXT:    ret float [[TMP2]]
82;
83  %sub = fsub float 4.000000e+00, %f1
84  %sub1 = fsub float 5.000000e+00, %f2
85  %add = fadd fast float %sub, %sub1
86  ret float %add
87}
88
89; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
90define float @fold4_reassoc_nsz(float %f1, float %f2) {
91; CHECK-LABEL: @fold4_reassoc_nsz(
92; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
93; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]]
94; CHECK-NEXT:    ret float [[TMP2]]
95;
96  %sub = fsub float 4.000000e+00, %f1
97  %sub1 = fsub float 5.000000e+00, %f2
98  %add = fadd reassoc nsz float %sub, %sub1
99  ret float %add
100}
101
102; TODO: This doesn't require 'nsz'.  It should fold to (9.0 - (f1 + f2)).
103define float @fold4_reassoc(float %f1, float %f2) {
104; CHECK-LABEL: @fold4_reassoc(
105; CHECK-NEXT:    [[TMP1:%.*]] = fsub float 4.000000e+00, [[F1:%.*]]
106; CHECK-NEXT:    [[TMP2:%.*]] = fsub float 5.000000e+00, [[F2:%.*]]
107; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
108; CHECK-NEXT:    ret float [[TMP3]]
109;
110  %sub = fsub float 4.000000e+00, %f1
111  %sub1 = fsub float 5.000000e+00, %f2
112  %add = fadd reassoc float %sub, %sub1
113  ret float %add
114}
115
116; (X + C1) + C2 => X + (C1 + C2)
117define float @fold5(float %f1) {
118; CHECK-LABEL: @fold5(
119; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00
120; CHECK-NEXT:    ret float [[ADD1]]
121;
122  %add = fadd float %f1, 4.000000e+00
123  %add1 = fadd fast float %add, 5.000000e+00
124  ret float %add1
125}
126
127; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
128define float @fold5_reassoc_nsz(float %f1) {
129; CHECK-LABEL: @fold5_reassoc_nsz(
130; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00
131; CHECK-NEXT:    ret float [[ADD1]]
132;
133  %add = fadd float %f1, 4.000000e+00
134  %add1 = fadd reassoc nsz float %add, 5.000000e+00
135  ret float %add1
136}
137
138; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
139define float @fold5_reassoc(float %f1) {
140; CHECK-LABEL: @fold5_reassoc(
141; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
142; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
143; CHECK-NEXT:    ret float [[ADD1]]
144;
145  %add = fadd float %f1, 4.000000e+00
146  %add1 = fadd reassoc float %add, 5.000000e+00
147  ret float %add1
148}
149
150; (X + X) + X + X => 4.0 * X
151define float @fold6(float %f1) {
152; CHECK-LABEL: @fold6(
153; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00
154; CHECK-NEXT:    ret float [[TMP1]]
155;
156  %t1 = fadd fast float %f1, %f1
157  %t2 = fadd fast float %f1, %t1
158  %t3 = fadd fast float %t2, %f1
159  ret float %t3
160}
161
162; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
163define float @fold6_reassoc_nsz(float %f1) {
164; CHECK-LABEL: @fold6_reassoc_nsz(
165; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00
166; CHECK-NEXT:    ret float [[TMP1]]
167;
168  %t1 = fadd reassoc nsz float %f1, %f1
169  %t2 = fadd reassoc nsz float %f1, %t1
170  %t3 = fadd reassoc nsz float %t2, %f1
171  ret float %t3
172}
173
174; TODO: This doesn't require 'nsz'.  It should fold to f1 * 4.0.
175define float @fold6_reassoc(float %f1) {
176; CHECK-LABEL: @fold6_reassoc(
177; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
178; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[TMP1]], [[F1]]
179; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP2]], [[F1]]
180; CHECK-NEXT:    ret float [[TMP3]]
181;
182  %t1 = fadd reassoc float %f1, %f1
183  %t2 = fadd reassoc float %f1, %t1
184  %t3 = fadd reassoc float %t2, %f1
185  ret float %t3
186}
187
188; C1 * X + (X + X) = (C1 + 2) * X
189define float @fold7(float %f1) {
190; CHECK-LABEL: @fold7(
191; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00
192; CHECK-NEXT:    ret float [[TMP1]]
193;
194  %t1 = fmul fast float %f1, 5.000000e+00
195  %t2 = fadd fast float %f1, %f1
196  %t3 = fadd fast float %t1, %t2
197  ret float %t3
198}
199
200; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
201define float @fold7_reassoc_nsz(float %f1) {
202; CHECK-LABEL: @fold7_reassoc_nsz(
203; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00
204; CHECK-NEXT:    ret float [[TMP1]]
205;
206  %t1 = fmul reassoc nsz float %f1, 5.000000e+00
207  %t2 = fadd reassoc nsz float %f1, %f1
208  %t3 = fadd reassoc nsz float %t1, %t2
209  ret float %t3
210}
211
212; TODO: This doesn't require 'nsz'.  It should fold to f1 * 7.0.
213define float @fold7_reassoc(float %f1) {
214; CHECK-LABEL: @fold7_reassoc(
215; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00
216; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]]
217; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
218; CHECK-NEXT:    ret float [[TMP3]]
219;
220  %t1 = fmul reassoc float %f1, 5.000000e+00
221  %t2 = fadd reassoc float %f1, %f1
222  %t3 = fadd reassoc float %t1, %t2
223  ret float %t3
224}
225
226; (X + X) + (X + X) + X => 5.0 * X
227define float @fold8(float %f1) {
228; CHECK-LABEL: @fold8(
229; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00
230; CHECK-NEXT:    ret float [[TMP1]]
231;
232  %t1 = fadd fast float %f1, %f1
233  %t2 = fadd fast float %f1, %f1
234  %t3 = fadd fast float %t1, %t2
235  %t4 = fadd fast float %t3, %f1
236  ret float %t4
237}
238
239; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
240define float @fold8_reassoc_nsz(float %f1) {
241; CHECK-LABEL: @fold8_reassoc_nsz(
242; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00
243; CHECK-NEXT:    ret float [[TMP1]]
244;
245  %t1 = fadd reassoc nsz float %f1, %f1
246  %t2 = fadd reassoc nsz float %f1, %f1
247  %t3 = fadd reassoc nsz float %t1, %t2
248  %t4 = fadd reassoc nsz float %t3, %f1
249  ret float %t4
250}
251
252; TODO: This doesn't require 'nsz'.  It should fold to f1 * 5.0.
253define float @fold8_reassoc(float %f1) {
254; CHECK-LABEL: @fold8_reassoc(
255; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
256; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]]
257; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
258; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc float [[TMP3]], [[F1]]
259; CHECK-NEXT:    ret float [[TMP4]]
260;
261  %t1 = fadd reassoc float %f1, %f1
262  %t2 = fadd reassoc float %f1, %f1
263  %t3 = fadd reassoc float %t1, %t2
264  %t4 = fadd reassoc float %t3, %f1
265  ret float %t4
266}
267
268; X - (X + Y) => 0 - Y
269define float @fold9(float %f1, float %f2) {
270; CHECK-LABEL: @fold9(
271; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[F2:%.*]]
272; CHECK-NEXT:    ret float [[TMP1]]
273;
274  %t1 = fadd float %f1, %f2
275  %t3 = fsub fast float %f1, %t1
276  ret float %t3
277}
278
279; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
280define float @fold9_reassoc_nsz(float %f1, float %f2) {
281; CHECK-LABEL: @fold9_reassoc_nsz(
282; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[F2:%.*]]
283; CHECK-NEXT:    ret float [[TMP1]]
284;
285  %t1 = fadd float %f1, %f2
286  %t3 = fsub reassoc nsz float %f1, %t1
287  ret float %t3
288}
289
290; TODO: This doesn't require 'nsz'.  It should fold to 0 - f2
291define float @fold9_reassoc(float %f1, float %f2) {
292; CHECK-LABEL: @fold9_reassoc(
293; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]]
294; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc float [[F1]], [[TMP1]]
295; CHECK-NEXT:    ret float [[TMP2]]
296;
297  %t1 = fadd float %f1, %f2
298  %t3 = fsub reassoc float %f1, %t1
299  ret float %t3
300}
301
302; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
303; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
304; top of resulting simplified expression tree may potentially reveal some
305; optimization opportunities in the super-expression trees.
306;
307define float @fold10(float %f1, float %f2) {
308; CHECK-LABEL: @fold10(
309; CHECK-NEXT:    [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
310; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00
311; CHECK-NEXT:    ret float [[T3]]
312;
313  %t1 = fadd fast float 2.000000e+00, %f1
314  %t2 = fsub fast float %f2, 3.000000e+00
315  %t3 = fadd fast float %t1, %t2
316  ret float %t3
317}
318
319; Check again with 'reassoc' and 'nsz'.
320; TODO: We may be able to remove the 'nsz' requirement.
321define float @fold10_reassoc_nsz(float %f1, float %f2) {
322; CHECK-LABEL: @fold10_reassoc_nsz(
323; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
324; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00
325; CHECK-NEXT:    ret float [[T3]]
326;
327  %t1 = fadd reassoc nsz float 2.000000e+00, %f1
328  %t2 = fsub reassoc nsz float %f2, 3.000000e+00
329  %t3 = fadd reassoc nsz float %t1, %t2
330  ret float %t3
331}
332
333; Observe that the fold is not done with only reassoc (the instructions are
334; canonicalized, but not folded).
335; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
336define float @fold10_reassoc(float %f1, float %f2) {
337; CHECK-LABEL: @fold10_reassoc(
338; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
339; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
340; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
341; CHECK-NEXT:    ret float [[TMP3]]
342;
343  %t1 = fadd reassoc float 2.000000e+00, %f1
344  %t2 = fsub reassoc float %f2, 3.000000e+00
345  %t3 = fadd reassoc float %t1, %t2
346  ret float %t3
347}
348
349; This used to crash/miscompile.
350
351define float @fail1(float %f1, float %f2) {
352; CHECK-LABEL: @fail1(
353; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00
354; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00
355; CHECK-NEXT:    ret float [[TMP2]]
356;
357  %conv3 = fadd fast float %f1, -1.000000e+00
358  %add = fadd fast float %conv3, %conv3
359  %add2 = fadd fast float %add, %conv3
360  ret float %add2
361}
362
363define double @fail2(double %f1, double %f2) {
364; CHECK-LABEL: @fail2(
365; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]]
366; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]]
367; CHECK-NEXT:    ret double [[TMP2]]
368;
369  %t1 = fsub fast double %f1, %f2
370  %t2 = fadd fast double %f1, %f2
371  %t3 = fsub fast double %t1, %t2
372  ret double %t3
373}
374
375; c1 * x - x => (c1 - 1.0) * x
376define float @fold13(float %x) {
377; CHECK-LABEL: @fold13(
378; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 6.000000e+00
379; CHECK-NEXT:    ret float [[TMP1]]
380;
381  %mul = fmul fast float %x, 7.000000e+00
382  %sub = fsub fast float %mul, %x
383  ret float %sub
384}
385
386; Check again using the minimal subset of FMF.
387define float @fold13_reassoc_nsz(float %x) {
388; CHECK-LABEL: @fold13_reassoc_nsz(
389; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00
390; CHECK-NEXT:    ret float [[TMP1]]
391;
392  %mul = fmul reassoc nsz float %x, 7.000000e+00
393  %sub = fsub reassoc nsz float %mul, %x
394  ret float %sub
395}
396
397; Verify the fold is not done with only 'reassoc' ('nsz' is required).
398define float @fold13_reassoc(float %x) {
399; CHECK-LABEL: @fold13_reassoc(
400; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00
401; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc float [[TMP1]], [[X]]
402; CHECK-NEXT:    ret float [[TMP2]]
403;
404  %mul = fmul reassoc float %x, 7.000000e+00
405  %sub = fsub reassoc float %mul, %x
406  ret float %sub
407}
408
409; (select X+Y, X-Y) => X + (select Y, -Y)
410; This is always safe.  No FMF required.
411define float @fold16(float %x, float %y) {
412; CHECK-LABEL: @fold16(
413; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
414; CHECK-NEXT:    [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]]
415; CHECK-NEXT:    [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]]
416; CHECK-NEXT:    [[R:%.*]] = fadd float [[R_P]], [[X]]
417; CHECK-NEXT:    ret float [[R]]
418;
419  %cmp = fcmp ogt float %x, %y
420  %plus = fadd float %x, %y
421  %minus = fsub float %x, %y
422  %r = select i1 %cmp, float %plus, float %minus
423  ret float %r
424}
425
426; =========================================================================
427;
428;   Testing-cases about negation
429;
430; =========================================================================
431define float @fneg1(float %f1, float %f2) {
432; CHECK-LABEL: @fneg1(
433; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
434; CHECK-NEXT:    ret float [[MUL]]
435;
436  %sub = fsub float -0.000000e+00, %f1
437  %sub1 = fsub nsz float 0.000000e+00, %f2
438  %mul = fmul float %sub, %sub1
439  ret float %mul
440}
441
442define float @fneg2(float %x) {
443; CHECK-LABEL: @fneg2(
444; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]]
445; CHECK-NEXT:    ret float [[SUB]]
446;
447  %sub = fsub nsz float 0.0, %x
448  ret float %sub
449}
450
451define <2 x float> @fneg2_vec_undef(<2 x float> %x) {
452; CHECK-LABEL: @fneg2_vec_undef(
453; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
454; CHECK-NEXT:    ret <2 x float> [[SUB]]
455;
456  %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x
457  ret <2 x float> %sub
458}
459
460; =========================================================================
461;
462;   Testing-cases about div
463;
464; =========================================================================
465
466; X/C1 / C2 => X * (1/(C2*C1))
467define float @fdiv1(float %x) {
468; CHECK-LABEL: @fdiv1(
469; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
470; CHECK-NEXT:    ret float [[DIV1]]
471;
472  %div = fdiv float %x, 0x3FF3333340000000
473  %div1 = fdiv fast float %div, 0x4002666660000000
474  ret float %div1
475; 0x3FF3333340000000 = 1.2f
476; 0x4002666660000000 = 2.3f
477; 0x3FD7303B60000000 = 0.36231884057971014492
478}
479
480; X*C1 / C2 => X * (C1/C2)
481define float @fdiv2(float %x) {
482; CHECK-LABEL: @fdiv2(
483; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000
484; CHECK-NEXT:    ret float [[DIV1]]
485;
486  %mul = fmul float %x, 0x3FF3333340000000
487  %div1 = fdiv fast float %mul, 0x4002666660000000
488  ret float %div1
489
490; 0x3FF3333340000000 = 1.2f
491; 0x4002666660000000 = 2.3f
492; 0x3FE0B21660000000 = 0.52173918485641479492
493}
494
495define <2 x float> @fdiv2_vec(<2 x float> %x) {
496; CHECK-LABEL: @fdiv2_vec(
497; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
498; CHECK-NEXT:    ret <2 x float> [[DIV1]]
499;
500  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
501  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
502  ret <2 x float> %div1
503}
504
505; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
506;
507define float @fdiv3(float %x) {
508; CHECK-LABEL: @fdiv3(
509; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47EFFFFFE0000000
510; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[DIV]], 0x3FDBD37A80000000
511; CHECK-NEXT:    ret float [[DIV1]]
512;
513  %div = fdiv float %x, 0x47EFFFFFE0000000
514  %div1 = fdiv fast float %div, 0x4002666660000000
515  ret float %div1
516}
517
518; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
519define float @fdiv4(float %x) {
520; CHECK-LABEL: @fdiv4(
521; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000
522; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000
523; CHECK-NEXT:    ret float [[DIV]]
524;
525  %mul = fmul float %x, 0x47EFFFFFE0000000
526  %div = fdiv float %mul, 0x3FC99999A0000000
527  ret float %div
528}
529
530; =========================================================================
531;
532;   Testing-cases about factorization
533;
534; =========================================================================
535; x*z + y*z => (x+y) * z
536define float @fact_mul1(float %x, float %y, float %z) {
537; CHECK-LABEL: @fact_mul1(
538; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
539; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
540; CHECK-NEXT:    ret float [[TMP2]]
541;
542  %t1 = fmul fast float %x, %z
543  %t2 = fmul fast float %y, %z
544  %t3 = fadd fast float %t1, %t2
545  ret float %t3
546}
547
548; Check again using the minimal subset of FMF.
549define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) {
550; CHECK-LABEL: @fact_mul1_reassoc_nsz(
551; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]]
552; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
553; CHECK-NEXT:    ret float [[TMP2]]
554;
555  %t1 = fmul reassoc nsz float %x, %z
556  %t2 = fmul reassoc nsz float %y, %z
557  %t3 = fadd reassoc nsz float %t1, %t2
558  ret float %t3
559}
560
561; Verify the fold is not done with only 'reassoc' ('nsz' is required).
562define float @fact_mul1_reassoc(float %x, float %y, float %z) {
563; CHECK-LABEL: @fact_mul1_reassoc(
564; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
565; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]] [[Z]]
566; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
567; CHECK-NEXT:    ret float [[TMP3]]
568;
569  %t1 = fmul reassoc float %x, %z
570  %t2 = fmul reassoc float %y, %z
571  %t3 = fadd reassoc float %t1, %t2
572  ret float %t3
573}
574
575; z*x + y*z => (x+y) * z
576define float @fact_mul2(float %x, float %y, float %z) {
577; CHECK-LABEL: @fact_mul2(
578; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
579; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
580; CHECK-NEXT:    ret float [[TMP2]]
581;
582  %t1 = fmul fast float %z, %x
583  %t2 = fmul fast float %y, %z
584  %t3 = fsub fast float %t1, %t2
585  ret float %t3
586}
587
588; Check again using the minimal subset of FMF.
589define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) {
590; CHECK-LABEL: @fact_mul2_reassoc_nsz(
591; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
592; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
593; CHECK-NEXT:    ret float [[TMP2]]
594;
595  %t1 = fmul reassoc nsz float %z, %x
596  %t2 = fmul reassoc nsz float %y, %z
597  %t3 = fsub reassoc nsz float %t1, %t2
598  ret float %t3
599}
600
601; Verify the fold is not done with only 'reassoc' ('nsz' is required).
602define float @fact_mul2_reassoc(float %x, float %y, float %z) {
603; CHECK-LABEL: @fact_mul2_reassoc(
604; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]]
605; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]]
606; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
607; CHECK-NEXT:    ret float [[TMP3]]
608;
609  %t1 = fmul reassoc float %z, %x
610  %t2 = fmul reassoc float %y, %z
611  %t3 = fsub reassoc float %t1, %t2
612  ret float %t3
613}
614
615; z*x - z*y => (x-y) * z
616define float @fact_mul3(float %x, float %y, float %z) {
617; CHECK-LABEL: @fact_mul3(
618; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
619; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
620; CHECK-NEXT:    ret float [[TMP2]]
621;
622  %t2 = fmul fast float %z, %y
623  %t1 = fmul fast float %z, %x
624  %t3 = fsub fast float %t1, %t2
625  ret float %t3
626}
627
628; Check again using the minimal subset of FMF.
629define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) {
630; CHECK-LABEL: @fact_mul3_reassoc_nsz(
631; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
632; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
633; CHECK-NEXT:    ret float [[TMP2]]
634;
635  %t2 = fmul reassoc nsz float %z, %y
636  %t1 = fmul reassoc nsz float %z, %x
637  %t3 = fsub reassoc nsz float %t1, %t2
638  ret float %t3
639}
640
641; Verify the fold is not done with only 'reassoc' ('nsz' is required).
642define float @fact_mul3_reassoc(float %x, float %y, float %z) {
643; CHECK-LABEL: @fact_mul3_reassoc(
644; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]]
645; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]]
646; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
647; CHECK-NEXT:    ret float [[TMP3]]
648;
649  %t2 = fmul reassoc float %z, %y
650  %t1 = fmul reassoc float %z, %x
651  %t3 = fsub reassoc float %t1, %t2
652  ret float %t3
653}
654
655; x*z - z*y => (x-y) * z
656define float @fact_mul4(float %x, float %y, float %z) {
657; CHECK-LABEL: @fact_mul4(
658; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
659; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
660; CHECK-NEXT:    ret float [[TMP2]]
661;
662  %t1 = fmul fast float %x, %z
663  %t2 = fmul fast float %z, %y
664  %t3 = fsub fast float %t1, %t2
665  ret float %t3
666}
667
668; Check again using the minimal subset of FMF.
669define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) {
670; CHECK-LABEL: @fact_mul4_reassoc_nsz(
671; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
672; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
673; CHECK-NEXT:    ret float [[TMP2]]
674;
675  %t1 = fmul reassoc nsz float %x, %z
676  %t2 = fmul reassoc nsz float %z, %y
677  %t3 = fsub reassoc nsz float %t1, %t2
678  ret float %t3
679}
680
681; Verify the fold is not done with only 'reassoc' ('nsz' is required).
682define float @fact_mul4_reassoc(float %x, float %y, float %z) {
683; CHECK-LABEL: @fact_mul4_reassoc(
684; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
685; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]]
686; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
687; CHECK-NEXT:    ret float [[TMP3]]
688;
689  %t1 = fmul reassoc float %x, %z
690  %t2 = fmul reassoc float %z, %y
691  %t3 = fsub reassoc float %t1, %t2
692  ret float %t3
693}
694
695; x/y + x/z, no xform
696define float @fact_div1(float %x, float %y, float %z) {
697; CHECK-LABEL: @fact_div1(
698; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
699; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[X]], [[Z:%.*]]
700; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
701; CHECK-NEXT:    ret float [[T3]]
702;
703  %t1 = fdiv fast float %x, %y
704  %t2 = fdiv fast float %x, %z
705  %t3 = fadd fast float %t1, %t2
706  ret float %t3
707}
708
709; x/y + z/x; no xform
710define float @fact_div2(float %x, float %y, float %z) {
711; CHECK-LABEL: @fact_div2(
712; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
713; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]]
714; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
715; CHECK-NEXT:    ret float [[T3]]
716;
717  %t1 = fdiv fast float %x, %y
718  %t2 = fdiv fast float %z, %x
719  %t3 = fadd fast float %t1, %t2
720  ret float %t3
721}
722
723; y/x + z/x => (y+z)/x
724define float @fact_div3(float %x, float %y, float %z) {
725; CHECK-LABEL: @fact_div3(
726; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[Y:%.*]], [[Z:%.*]]
727; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
728; CHECK-NEXT:    ret float [[TMP2]]
729;
730  %t1 = fdiv fast float %y, %x
731  %t2 = fdiv fast float %z, %x
732  %t3 = fadd fast float %t1, %t2
733  ret float %t3
734}
735
736; Check again using the minimal subset of FMF.
737define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) {
738; CHECK-LABEL: @fact_div3_reassoc_nsz(
739; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]]
740; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
741; CHECK-NEXT:    ret float [[TMP2]]
742;
743  %t1 = fdiv reassoc nsz float %y, %x
744  %t2 = fdiv reassoc nsz float %z, %x
745  %t3 = fadd reassoc nsz float %t1, %t2
746  ret float %t3
747}
748
749; Verify the fold is not done with only 'reassoc' ('nsz' is required).
750define float @fact_div3_reassoc(float %x, float %y, float %z) {
751; CHECK-LABEL: @fact_div3_reassoc(
752; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
753; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
754; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
755; CHECK-NEXT:    ret float [[TMP3]]
756;
757  %t1 = fdiv reassoc float %y, %x
758  %t2 = fdiv reassoc float %z, %x
759  %t3 = fadd reassoc float %t1, %t2
760  ret float %t3
761}
762
763; y/x - z/x => (y-z)/x
764define float @fact_div4(float %x, float %y, float %z) {
765; CHECK-LABEL: @fact_div4(
766; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Y:%.*]], [[Z:%.*]]
767; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
768; CHECK-NEXT:    ret float [[TMP2]]
769;
770  %t1 = fdiv fast float %y, %x
771  %t2 = fdiv fast float %z, %x
772  %t3 = fsub fast float %t1, %t2
773  ret float %t3
774}
775
776; Check again using the minimal subset of FMF.
777define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) {
778; CHECK-LABEL: @fact_div4_reassoc_nsz(
779; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]]
780; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
781; CHECK-NEXT:    ret float [[TMP2]]
782;
783  %t1 = fdiv reassoc nsz float %y, %x
784  %t2 = fdiv reassoc nsz float %z, %x
785  %t3 = fsub reassoc nsz float %t1, %t2
786  ret float %t3
787}
788
789; Verify the fold is not done with only 'reassoc' ('nsz' is required).
790define float @fact_div4_reassoc(float %x, float %y, float %z) {
791; CHECK-LABEL: @fact_div4_reassoc(
792; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
793; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
794; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
795; CHECK-NEXT:    ret float [[TMP3]]
796;
797  %t1 = fdiv reassoc float %y, %x
798  %t2 = fdiv reassoc float %z, %x
799  %t3 = fsub reassoc float %t1, %t2
800  ret float %t3
801}
802
803; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
804define float @fact_div5(float %x) {
805; CHECK-LABEL: @fact_div5(
806; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]]
807; CHECK-NEXT:    ret float [[TMP1]]
808;
809  %t1 = fdiv fast float 0x3810000000000000, %x
810  %t2 = fdiv fast float 0x3800000000000000, %x
811  %t3 = fadd fast float %t1, %t2
812  ret float %t3
813}
814
815; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
816define float @fact_div6(float %x) {
817; CHECK-LABEL: @fact_div6(
818; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]]
819; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]]
820; CHECK-NEXT:    [[T3:%.*]] = fsub fast float [[T1]], [[T2]]
821; CHECK-NEXT:    ret float [[T3]]
822;
823  %t1 = fdiv fast float 0x3810000000000000, %x
824  %t2 = fdiv fast float 0x3800000000000000, %x
825  %t3 = fsub fast float %t1, %t2
826  ret float %t3
827}
828
829; =========================================================================
830;
831;   Test-cases for square root
832;
833; =========================================================================
834
835; A squared factor fed into a square root intrinsic should be hoisted out
836; as a fabs() value.
837
838declare double @llvm.sqrt.f64(double)
839
840define double @sqrt_intrinsic_arg_squared(double %x) {
841; CHECK-LABEL: @sqrt_intrinsic_arg_squared(
842; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
843; CHECK-NEXT:    ret double [[FABS]]
844;
845  %mul = fmul fast double %x, %x
846  %sqrt = call fast double @llvm.sqrt.f64(double %mul)
847  ret double %sqrt
848}
849
850; Check all 6 combinations of a 3-way multiplication tree where
851; one factor is repeated.
852
853define double @sqrt_intrinsic_three_args1(double %x, double %y) {
854; CHECK-LABEL: @sqrt_intrinsic_three_args1(
855; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
856; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
857; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
858; CHECK-NEXT:    ret double [[TMP1]]
859;
860  %mul = fmul fast double %y, %x
861  %mul2 = fmul fast double %mul, %x
862  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
863  ret double %sqrt
864}
865
866define double @sqrt_intrinsic_three_args2(double %x, double %y) {
867; CHECK-LABEL: @sqrt_intrinsic_three_args2(
868; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
869; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
870; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
871; CHECK-NEXT:    ret double [[TMP1]]
872;
873  %mul = fmul fast double %x, %y
874  %mul2 = fmul fast double %mul, %x
875  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
876  ret double %sqrt
877}
878
879define double @sqrt_intrinsic_three_args3(double %x, double %y) {
880; CHECK-LABEL: @sqrt_intrinsic_three_args3(
881; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
882; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
883; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
884; CHECK-NEXT:    ret double [[TMP1]]
885;
886  %mul = fmul fast double %x, %x
887  %mul2 = fmul fast double %mul, %y
888  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
889  ret double %sqrt
890}
891
892define double @sqrt_intrinsic_three_args4(double %x, double %y) {
893; CHECK-LABEL: @sqrt_intrinsic_three_args4(
894; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
895; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
896; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
897; CHECK-NEXT:    ret double [[TMP1]]
898;
899  %mul = fmul fast double %y, %x
900  %mul2 = fmul fast double %x, %mul
901  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
902  ret double %sqrt
903}
904
905define double @sqrt_intrinsic_three_args5(double %x, double %y) {
906; CHECK-LABEL: @sqrt_intrinsic_three_args5(
907; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
908; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
909; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
910; CHECK-NEXT:    ret double [[TMP1]]
911;
912  %mul = fmul fast double %x, %y
913  %mul2 = fmul fast double %x, %mul
914  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
915  ret double %sqrt
916}
917
918define double @sqrt_intrinsic_three_args6(double %x, double %y) {
919; CHECK-LABEL: @sqrt_intrinsic_three_args6(
920; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
921; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
922; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
923; CHECK-NEXT:    ret double [[TMP1]]
924;
925  %mul = fmul fast double %x, %x
926  %mul2 = fmul fast double %y, %mul
927  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
928  ret double %sqrt
929}
930
931; If any operation is not 'fast', we can't simplify.
932
933define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
934; CHECK-LABEL: @sqrt_intrinsic_not_so_fast(
935; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
936; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]]
937; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]])
938; CHECK-NEXT:    ret double [[SQRT]]
939;
940  %mul = fmul double %x, %x
941  %mul2 = fmul fast double %mul, %y
942  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
943  ret double %sqrt
944}
945
946define double @sqrt_intrinsic_arg_4th(double %x) {
947; CHECK-LABEL: @sqrt_intrinsic_arg_4th(
948; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
949; CHECK-NEXT:    ret double [[MUL]]
950;
951  %mul = fmul fast double %x, %x
952  %mul2 = fmul fast double %mul, %mul
953  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
954  ret double %sqrt
955}
956
957define double @sqrt_intrinsic_arg_5th(double %x) {
958; CHECK-LABEL: @sqrt_intrinsic_arg_5th(
959; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
960; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
961; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]]
962; CHECK-NEXT:    ret double [[TMP1]]
963;
964  %mul = fmul fast double %x, %x
965  %mul2 = fmul fast double %mul, %x
966  %mul3 = fmul fast double %mul2, %mul
967  %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
968  ret double %sqrt
969}
970
971; Check that square root calls have the same behavior.
972
973declare float @sqrtf(float)
974declare double @sqrt(double)
975declare fp128 @sqrtl(fp128)
976
977define float @sqrt_call_squared_f32(float %x) {
978; CHECK-LABEL: @sqrt_call_squared_f32(
979; CHECK-NEXT:    [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
980; CHECK-NEXT:    ret float [[FABS]]
981;
982  %mul = fmul fast float %x, %x
983  %sqrt = call fast float @sqrtf(float %mul)
984  ret float %sqrt
985}
986
987define double @sqrt_call_squared_f64(double %x) {
988; CHECK-LABEL: @sqrt_call_squared_f64(
989; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
990; CHECK-NEXT:    ret double [[FABS]]
991;
992  %mul = fmul fast double %x, %x
993  %sqrt = call fast double @sqrt(double %mul)
994  ret double %sqrt
995}
996
997define fp128 @sqrt_call_squared_f128(fp128 %x) {
998; CHECK-LABEL: @sqrt_call_squared_f128(
999; CHECK-NEXT:    [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
1000; CHECK-NEXT:    ret fp128 [[FABS]]
1001;
1002  %mul = fmul fast fp128 %x, %x
1003  %sqrt = call fast fp128 @sqrtl(fp128 %mul)
1004  ret fp128 %sqrt
1005}
1006
1007; =========================================================================
1008;
1009;   Test-cases for fmin / fmax
1010;
1011; =========================================================================
1012
1013declare double @fmax(double, double)
1014declare double @fmin(double, double)
1015declare float @fmaxf(float, float)
1016declare float @fminf(float, float)
1017declare fp128 @fmaxl(fp128, fp128)
1018declare fp128 @fminl(fp128, fp128)
1019
1020; No NaNs is the minimum requirement to replace these calls.
1021; This should always be set when unsafe-fp-math is true, but
1022; alternate the attributes for additional test coverage.
1023; 'nsz' is implied by the definition of fmax or fmin itself.
1024
1025; Shrink and remove the call.
1026define float @max1(float %a, float %b) {
1027; CHECK-LABEL: @max1(
1028; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
1029; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1030; CHECK-NEXT:    ret float [[TMP2]]
1031;
1032  %c = fpext float %a to double
1033  %d = fpext float %b to double
1034  %e = call fast double @fmax(double %c, double %d)
1035  %f = fptrunc double %e to float
1036  ret float %f
1037}
1038
1039define float @max2(float %a, float %b) {
1040; CHECK-LABEL: @max2(
1041; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
1042; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1043; CHECK-NEXT:    ret float [[TMP2]]
1044;
1045  %c = call nnan float @fmaxf(float %a, float %b)
1046  ret float %c
1047}
1048
1049
1050define double @max3(double %a, double %b) {
1051; CHECK-LABEL: @max3(
1052; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
1053; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
1054; CHECK-NEXT:    ret double [[TMP2]]
1055;
1056  %c = call fast double @fmax(double %a, double %b)
1057  ret double %c
1058}
1059
1060define fp128 @max4(fp128 %a, fp128 %b) {
1061; CHECK-LABEL: @max4(
1062; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
1063; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
1064; CHECK-NEXT:    ret fp128 [[TMP2]]
1065;
1066  %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
1067  ret fp128 %c
1068}
1069
1070; Shrink and remove the call.
1071define float @min1(float %a, float %b) {
1072; CHECK-LABEL: @min1(
1073; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
1074; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1075; CHECK-NEXT:    ret float [[TMP2]]
1076;
1077  %c = fpext float %a to double
1078  %d = fpext float %b to double
1079  %e = call nnan double @fmin(double %c, double %d)
1080  %f = fptrunc double %e to float
1081  ret float %f
1082}
1083
1084define float @min2(float %a, float %b) {
1085; CHECK-LABEL: @min2(
1086; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
1087; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
1088; CHECK-NEXT:    ret float [[TMP2]]
1089;
1090  %c = call fast float @fminf(float %a, float %b)
1091  ret float %c
1092}
1093
1094define double @min3(double %a, double %b) {
1095; CHECK-LABEL: @min3(
1096; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
1097; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
1098; CHECK-NEXT:    ret double [[TMP2]]
1099;
1100  %c = call nnan double @fmin(double %a, double %b)
1101  ret double %c
1102}
1103
1104define fp128 @min4(fp128 %a, fp128 %b) {
1105; CHECK-LABEL: @min4(
1106; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
1107; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
1108; CHECK-NEXT:    ret fp128 [[TMP2]]
1109;
1110  %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
1111  ret fp128 %c
1112}
1113
1114; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0))
1115; This is always safe.  No FMF required.
1116define float @test55(i1 %which, float %a) {
1117; CHECK-LABEL: @test55(
1118; CHECK-NEXT:  entry:
1119; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
1120; CHECK:       delay:
1121; CHECK-NEXT:    [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00
1122; CHECK-NEXT:    br label [[FINAL]]
1123; CHECK:       final:
1124; CHECK-NEXT:    [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ]
1125; CHECK-NEXT:    ret float [[A]]
1126;
1127entry:
1128  br i1 %which, label %final, label %delay
1129
1130delay:
1131  br label %final
1132
1133final:
1134  %A = phi float [ 2.0, %entry ], [ %a, %delay ]
1135  %value = fadd float %A, 1.0
1136  ret float %value
1137}
1138