1; RUN: opt < %s -instcombine -S | FileCheck %s
2
3; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
4; 1.2f and 2.3f is supposed to be fold.
5define float @fold(float %a) {
6  %mul = fmul fast float %a, 0x3FF3333340000000
7  %mul1 = fmul fast float %mul, 0x4002666660000000
8  ret float %mul1
9; CHECK-LABEL: @fold(
10; CHECK: fmul fast float %a, 0x4006147AE0000000
11}
12
13; Same testing-case as the one used in fold() except that the operators have
14; fixed FP mode.
15define float @notfold(float %a) {
16; CHECK-LABEL: @notfold(
17; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
18  %mul = fmul fast float %a, 0x3FF3333340000000
19  %mul1 = fmul float %mul, 0x4002666660000000
20  ret float %mul1
21}
22
23define float @fold2(float %a) {
24; CHECK-LABEL: @fold2(
25; CHECK: fmul fast float %a, 0x4006147AE0000000
26  %mul = fmul float %a, 0x3FF3333340000000
27  %mul1 = fmul fast float %mul, 0x4002666660000000
28  ret float %mul1
29}
30
31; C * f1 + f1 = (C+1) * f1
32define double @fold3(double %f1) {
33  %t1 = fmul fast double 2.000000e+00, %f1
34  %t2 = fadd fast double %f1, %t1
35  ret double %t2
36; CHECK-LABEL: @fold3(
37; CHECK: fmul fast double %f1, 3.000000e+00
38}
39
40; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
41define float @fold4(float %f1, float %f2) {
42  %sub = fsub float 4.000000e+00, %f1
43  %sub1 = fsub float 5.000000e+00, %f2
44  %add = fadd fast float %sub, %sub1
45  ret float %add
46; CHECK-LABEL: @fold4(
47; CHECK: %1 = fadd fast float %f1, %f2
48; CHECK: fsub fast float 9.000000e+00, %1
49}
50
51; (X + C1) + C2 => X + (C1 + C2)
52define float @fold5(float %f1, float %f2) {
53  %add = fadd float %f1, 4.000000e+00
54  %add1 = fadd fast float %add, 5.000000e+00
55  ret float %add1
56; CHECK-LABEL: @fold5(
57; CHECK: fadd fast float %f1, 9.000000e+00
58}
59
60; (X + X) + X => 3.0 * X
61define float @fold6(float %f1) {
62  %t1 = fadd fast float %f1, %f1
63  %t2 = fadd fast float %f1, %t1
64  ret float %t2
65; CHECK-LABEL: @fold6(
66; CHECK: fmul fast float %f1, 3.000000e+00
67}
68
69; C1 * X + (X + X) = (C1 + 2) * X
70define float @fold7(float %f1) {
71  %t1 = fmul fast float %f1, 5.000000e+00
72  %t2 = fadd fast float %f1, %f1
73  %t3 = fadd fast float %t1, %t2
74  ret float %t3
75; CHECK-LABEL: @fold7(
76; CHECK: fmul fast float %f1, 7.000000e+00
77}
78
79; (X + X) + (X + X) => 4.0 * X
80define float @fold8(float %f1) {
81  %t1 = fadd fast float %f1, %f1
82  %t2 = fadd fast float %f1, %f1
83  %t3 = fadd fast float %t1, %t2
84  ret float %t3
85; CHECK: fold8
86; CHECK: fmul fast float %f1, 4.000000e+00
87}
88
89; X - (X + Y) => 0 - Y
90define float @fold9(float %f1, float %f2) {
91  %t1 = fadd float %f1, %f2
92  %t3 = fsub fast float %f1, %t1
93  ret float %t3
94
95; CHECK-LABEL: @fold9(
96; CHECK: fsub fast float -0.000000e+00, %f2
97}
98
99; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
100; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
101; top of resulting simplified expression tree may potentially reveal some
102; optimization opportunities in the super-expression trees.
103;
104define float @fold10(float %f1, float %f2) {
105  %t1 = fadd fast float 2.000000e+00, %f1
106  %t2 = fsub fast float %f2, 3.000000e+00
107  %t3 = fadd fast float %t1, %t2
108  ret float %t3
109; CHECK-LABEL: @fold10(
110; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
111; CHECK: ret float %t3
112}
113
114; once cause Crash/miscompilation
115define float @fail1(float %f1, float %f2) {
116  %conv3 = fadd fast float %f1, -1.000000e+00
117  %add = fadd fast float %conv3, %conv3
118  %add2 = fadd fast float %add, %conv3
119  ret float %add2
120; CHECK-LABEL: @fail1(
121; CHECK: ret
122}
123
124define double @fail2(double %f1, double %f2) {
125  %t1 = fsub fast double %f1, %f2
126  %t2 = fadd fast double %f1, %f2
127  %t3 = fsub fast double %t1, %t2
128  ret double %t3
129; CHECK-LABEL: @fail2(
130; CHECK: ret
131}
132
133; c1 * x - x => (c1 - 1.0) * x
134define float @fold13(float %x) {
135  %mul = fmul fast float %x, 7.000000e+00
136  %sub = fsub fast float %mul, %x
137  ret float %sub
138; CHECK: fold13
139; CHECK: fmul fast float %x, 6.000000e+00
140; CHECK: ret
141}
142
143; -x + y => y - x
144define float @fold14(float %x, float %y) {
145  %neg = fsub fast float -0.0, %x
146  %add = fadd fast float %neg, %y
147  ret float %add
148; CHECK: fold14
149; CHECK: fsub fast float %y, %x
150; CHECK: ret
151}
152
153; x + -y => x - y
154define float @fold15(float %x, float %y) {
155  %neg = fsub fast float -0.0, %y
156  %add = fadd fast float %x, %neg
157  ret float %add
158; CHECK: fold15
159; CHECK: fsub fast float %x, %y
160; CHECK: ret
161}
162
163; (select X+Y, X-Y) => X + (select Y, -Y)
164define float @fold16(float %x, float %y) {
165  %cmp = fcmp ogt float %x, %y
166  %plus = fadd fast float %x, %y
167  %minus = fsub fast float %x, %y
168  %r = select i1 %cmp, float %plus, float %minus
169  ret float %r
170; CHECK: fold16
171; CHECK: fsub fast float
172; CHECK: select
173; CHECK: fadd fast float
174; CHECK: ret
175}
176
177
178
179; =========================================================================
180;
181;   Testing-cases about fmul begin
182;
183; =========================================================================
184
185; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
186define float @fmul_distribute1(float %f1) {
187  %t1 = fmul float %f1, 6.0e+3
188  %t2 = fadd float %t1, 2.0e+3
189  %t3 = fmul fast float %t2, 5.0e+3
190  ret float %t3
191; CHECK-LABEL: @fmul_distribute1(
192; CHECK: %1 = fmul fast float %f1, 3.000000e+07
193; CHECK: %t3 = fadd fast float %1, 1.000000e+07
194}
195
196; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
197define double @fmul_distribute2(double %f1, double %f2) {
198  %t1 = fdiv double %f1, 3.0e+0
199  %t2 = fadd double %t1, 5.0e+1
200  ; 0x10000000000000 = DBL_MIN
201  %t3 = fmul fast double %t2, 0x10000000000000
202  ret double %t3
203
204; CHECK-LABEL: @fmul_distribute2(
205; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
206; CHECK: fadd fast double %1, 0x69000000000000
207}
208
209; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
210; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
211define double @fmul_distribute3(double %f1) {
212  %t1 = fdiv double %f1, 3.0e+0
213  %t2 = fadd double %t1, 5.0e-1
214  %t3 = fmul fast double %t2, 0x10000000000000
215  ret double %t3
216
217; CHECK-LABEL: @fmul_distribute3(
218; CHECK: fmul fast double %t2, 0x10000000000000
219}
220
221; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
222define float @fmul_distribute4(float %f1) {
223  %t1 = fmul float %f1, 6.0e+3
224  %t2 = fsub float 2.0e+3, %t1
225  %t3 = fmul fast float %t2, 5.0e+3
226  ret float %t3
227; CHECK-LABEL: @fmul_distribute4(
228; CHECK: %1 = fmul fast float %f1, 3.000000e+07
229; CHECK: %t3 = fsub fast float 1.000000e+07, %1
230}
231
232; C1/X * C2 => (C1*C2) / X
233define float @fmul2(float %f1) {
234  %t1 = fdiv float 2.0e+3, %f1
235  %t3 = fmul fast float %t1, 6.0e+3
236  ret float %t3
237; CHECK-LABEL: @fmul2(
238; CHECK: fdiv fast float 1.200000e+07, %f1
239}
240
241; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
242@fmul2_external = external global float
243define float @fmul2_disable(float %f1) {
244  %div = fdiv fast float 1.000000e+00, %f1
245  store float %div, float* @fmul2_external
246  %mul = fmul fast float %div, 2.000000e+00
247  ret float %mul
248; CHECK-LABEL: @fmul2_disable
249; CHECK: store
250; CHECK: fmul fast
251}
252
253; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
254define float @fmul3(float %f1, float %f2) {
255  %t1 = fdiv float %f1, 2.0e+3
256  %t3 = fmul fast float %t1, 6.0e+3
257  ret float %t3
258; CHECK-LABEL: @fmul3(
259; CHECK: fmul fast float %f1, 3.000000e+00
260}
261
262define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) {
263  %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
264  %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
265  ret <4 x float> %t3
266; CHECK-LABEL: @fmul3_vec(
267; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
268}
269
270; Make sure fmul with constant expression doesn't assert.
271define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) {
272  %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
273  %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
274  %t3 = fmul fast <4 x float> %t1, %constExprMul
275  ret <4 x float> %t3
276}
277
278; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
279; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
280;
281define float @fmul4(float %f1, float %f2) {
282  %t1 = fdiv float %f1, 2.0e+3
283  %t3 = fmul fast float %t1, 0x3810000000000000
284  ret float %t3
285; CHECK-LABEL: @fmul4(
286; CHECK: fmul fast float %t1, 0x3810000000000000
287}
288
289; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
290;  and C2/C1 is a normal value.
291;
292define float @fmul5(float %f1, float %f2) {
293  %t1 = fdiv float %f1, 3.0e+0
294  %t3 = fmul fast float %t1, 0x3810000000000000
295  ret float %t3
296; CHECK-LABEL: @fmul5(
297; CHECK: fdiv fast float %f1, 0x47E8000000000000
298}
299
300; (X*Y) * X => (X*X) * Y
301define float @fmul6(float %f1, float %f2) {
302  %mul = fmul float %f1, %f2
303  %mul1 = fmul fast float %mul, %f1
304  ret float %mul1
305; CHECK-LABEL: @fmul6(
306; CHECK: fmul fast float %f1, %f1
307}
308
309; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
310define float @fmul7(float %f1, float %f2) {
311  %mul = fmul float %f1, %f2
312  %mul1 = fmul fast float %mul, %f1
313  %add = fadd float %mul1, %mul
314  ret float %add
315; CHECK-LABEL: @fmul7(
316; CHECK: fmul fast float %mul, %f1
317}
318
319; =========================================================================
320;
321;   Testing-cases about negation
322;
323; =========================================================================
324define float @fneg1(float %f1, float %f2) {
325  %sub = fsub float -0.000000e+00, %f1
326  %sub1 = fsub nsz float 0.000000e+00, %f2
327  %mul = fmul float %sub, %sub1
328  ret float %mul
329; CHECK-LABEL: @fneg1(
330; CHECK: fmul float %f1, %f2
331}
332
333define float @fneg2(float %x) {
334  %sub = fsub nsz float 0.0, %x
335  ret float %sub
336; CHECK-LABEL: @fneg2(
337; CHECK-NEXT: fsub nsz float -0.000000e+00, %x
338; CHECK-NEXT: ret float
339}
340
341; =========================================================================
342;
343;   Testing-cases about div
344;
345; =========================================================================
346
347; X/C1 / C2 => X * (1/(C2*C1))
348define float @fdiv1(float %x) {
349  %div = fdiv float %x, 0x3FF3333340000000
350  %div1 = fdiv fast float %div, 0x4002666660000000
351  ret float %div1
352; 0x3FF3333340000000 = 1.2f
353; 0x4002666660000000 = 2.3f
354; 0x3FD7303B60000000 = 0.36231884057971014492
355; CHECK-LABEL: @fdiv1(
356; CHECK: fmul fast float %x, 0x3FD7303B60000000
357}
358
359; X*C1 / C2 => X * (C1/C2)
360define float @fdiv2(float %x) {
361  %mul = fmul float %x, 0x3FF3333340000000
362  %div1 = fdiv fast float %mul, 0x4002666660000000
363  ret float %div1
364
365; 0x3FF3333340000000 = 1.2f
366; 0x4002666660000000 = 2.3f
367; 0x3FE0B21660000000 = 0.52173918485641479492
368; CHECK-LABEL: @fdiv2(
369; CHECK: fmul fast float %x, 0x3FE0B21660000000
370}
371
372define <2 x float> @fdiv2_vec(<2 x float> %x) {
373  %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
374  %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
375  ret <2 x float> %div1
376
377; CHECK-LABEL: @fdiv2_vec(
378; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00>
379}
380
381; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
382;
383define float @fdiv3(float %x) {
384  %div = fdiv float %x, 0x47EFFFFFE0000000
385  %div1 = fdiv fast float %div, 0x4002666660000000
386  ret float %div1
387; CHECK-LABEL: @fdiv3(
388; CHECK: fdiv float %x, 0x47EFFFFFE0000000
389}
390
391; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
392define float @fdiv4(float %x) {
393  %mul = fmul float %x, 0x47EFFFFFE0000000
394  %div = fdiv float %mul, 0x3FC99999A0000000
395  ret float %div
396; CHECK-LABEL: @fdiv4(
397; CHECK: fmul float %x, 0x47EFFFFFE0000000
398}
399
400; (X/Y)/Z = > X/(Y*Z)
401define float @fdiv5(float %f1, float %f2, float %f3) {
402  %t1 = fdiv float %f1, %f2
403  %t2 = fdiv fast float %t1, %f3
404  ret float %t2
405; CHECK-LABEL: @fdiv5(
406; CHECK: fmul float %f2, %f3
407}
408
409; Z/(X/Y) = > (Z*Y)/X
410define float @fdiv6(float %f1, float %f2, float %f3) {
411  %t1 = fdiv float %f1, %f2
412  %t2 = fdiv fast float %f3, %t1
413  ret float %t2
414; CHECK-LABEL: @fdiv6(
415; CHECK: fmul float %f3, %f2
416}
417
418; C1/(X*C2) => (C1/C2) / X
419define float @fdiv7(float %x) {
420  %t1 = fmul float %x, 3.0e0
421  %t2 = fdiv fast float 15.0e0, %t1
422  ret float %t2
423; CHECK-LABEL: @fdiv7(
424; CHECK: fdiv fast float 5.000000e+00, %x
425}
426
427; C1/(X/C2) => (C1*C2) / X
428define float @fdiv8(float %x) {
429  %t1 = fdiv float %x, 3.0e0
430  %t2 = fdiv fast float 15.0e0, %t1
431  ret float %t2
432; CHECK-LABEL: @fdiv8(
433; CHECK: fdiv fast float 4.500000e+01, %x
434}
435
436; C1/(C2/X) => (C1/C2) * X
437define float @fdiv9(float %x) {
438  %t1 = fdiv float 3.0e0, %x
439  %t2 = fdiv fast float 15.0e0, %t1
440  ret float %t2
441; CHECK-LABEL: @fdiv9(
442; CHECK: fmul fast float %x, 5.000000e+00
443}
444
445; =========================================================================
446;
447;   Testing-cases about factorization
448;
449; =========================================================================
450; x*z + y*z => (x+y) * z
451define float @fact_mul1(float %x, float %y, float %z) {
452  %t1 = fmul fast float %x, %z
453  %t2 = fmul fast float %y, %z
454  %t3 = fadd fast float %t1, %t2
455  ret float %t3
456; CHECK-LABEL: @fact_mul1(
457; CHECK: fmul fast float %1, %z
458}
459
460; z*x + y*z => (x+y) * z
461define float @fact_mul2(float %x, float %y, float %z) {
462  %t1 = fmul fast float %z, %x
463  %t2 = fmul fast float %y, %z
464  %t3 = fsub fast float %t1, %t2
465  ret float %t3
466; CHECK-LABEL: @fact_mul2(
467; CHECK: fmul fast float %1, %z
468}
469
470; z*x - z*y => (x-y) * z
471define float @fact_mul3(float %x, float %y, float %z) {
472  %t2 = fmul fast float %z, %y
473  %t1 = fmul fast float %z, %x
474  %t3 = fsub fast float %t1, %t2
475  ret float %t3
476; CHECK-LABEL: @fact_mul3(
477; CHECK: fmul fast float %1, %z
478}
479
480; x*z - z*y => (x-y) * z
481define float @fact_mul4(float %x, float %y, float %z) {
482  %t1 = fmul fast float %x, %z
483  %t2 = fmul fast float %z, %y
484  %t3 = fsub fast float %t1, %t2
485  ret float %t3
486; CHECK-LABEL: @fact_mul4(
487; CHECK: fmul fast float %1, %z
488}
489
490; x/y + x/z, no xform
491define float @fact_div1(float %x, float %y, float %z) {
492  %t1 = fdiv fast float %x, %y
493  %t2 = fdiv fast float %x, %z
494  %t3 = fadd fast float %t1, %t2
495  ret float %t3
496; CHECK: fact_div1
497; CHECK: fadd fast float %t1, %t2
498}
499
500; x/y + z/x; no xform
501define float @fact_div2(float %x, float %y, float %z) {
502  %t1 = fdiv fast float %x, %y
503  %t2 = fdiv fast float %z, %x
504  %t3 = fadd fast float %t1, %t2
505  ret float %t3
506; CHECK: fact_div2
507; CHECK: fadd fast float %t1, %t2
508}
509
510; y/x + z/x => (y+z)/x
511define float @fact_div3(float %x, float %y, float %z) {
512  %t1 = fdiv fast float %y, %x
513  %t2 = fdiv fast float %z, %x
514  %t3 = fadd fast float %t1, %t2
515  ret float %t3
516; CHECK: fact_div3
517; CHECK: fdiv fast float %1, %x
518}
519
520; y/x - z/x => (y-z)/x
521define float @fact_div4(float %x, float %y, float %z) {
522  %t1 = fdiv fast float %y, %x
523  %t2 = fdiv fast float %z, %x
524  %t3 = fsub fast float %t1, %t2
525  ret float %t3
526; CHECK: fact_div4
527; CHECK: fdiv fast float %1, %x
528}
529
530; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
531define float @fact_div5(float %x) {
532  %t1 = fdiv fast float 0x3810000000000000, %x
533  %t2 = fdiv fast float 0x3800000000000000, %x
534  %t3 = fadd fast float %t1, %t2
535  ret float %t3
536; CHECK: fact_div5
537; CHECK: fdiv fast float 0x3818000000000000, %x
538}
539
540; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
541define float @fact_div6(float %x) {
542  %t1 = fdiv fast float 0x3810000000000000, %x
543  %t2 = fdiv fast float 0x3800000000000000, %x
544  %t3 = fsub fast float %t1, %t2
545  ret float %t3
546; CHECK: fact_div6
547; CHECK: %t3 = fsub fast float %t1, %t2
548}
549
550; =========================================================================
551;
552;   Test-cases for square root
553;
554; =========================================================================
555
556; A squared factor fed into a square root intrinsic should be hoisted out
557; as a fabs() value.
558
559declare double @llvm.sqrt.f64(double)
560
561define double @sqrt_intrinsic_arg_squared(double %x) {
562  %mul = fmul fast double %x, %x
563  %sqrt = call fast double @llvm.sqrt.f64(double %mul)
564  ret double %sqrt
565
566; CHECK-LABEL: sqrt_intrinsic_arg_squared(
567; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
568; CHECK-NEXT: ret double %fabs
569}
570
571; Check all 6 combinations of a 3-way multiplication tree where
572; one factor is repeated.
573
574define double @sqrt_intrinsic_three_args1(double %x, double %y) {
575  %mul = fmul fast double %y, %x
576  %mul2 = fmul fast double %mul, %x
577  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
578  ret double %sqrt
579
580; CHECK-LABEL: sqrt_intrinsic_three_args1(
581; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
582; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
583; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
584; CHECK-NEXT: ret double %1
585}
586
587define double @sqrt_intrinsic_three_args2(double %x, double %y) {
588  %mul = fmul fast double %x, %y
589  %mul2 = fmul fast double %mul, %x
590  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
591  ret double %sqrt
592
593; CHECK-LABEL: sqrt_intrinsic_three_args2(
594; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
595; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
596; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
597; CHECK-NEXT: ret double %1
598}
599
600define double @sqrt_intrinsic_three_args3(double %x, double %y) {
601  %mul = fmul fast double %x, %x
602  %mul2 = fmul fast double %mul, %y
603  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
604  ret double %sqrt
605
606; CHECK-LABEL: sqrt_intrinsic_three_args3(
607; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
608; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
609; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
610; CHECK-NEXT: ret double %1
611}
612
613define double @sqrt_intrinsic_three_args4(double %x, double %y) {
614  %mul = fmul fast double %y, %x
615  %mul2 = fmul fast double %x, %mul
616  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
617  ret double %sqrt
618
619; CHECK-LABEL: sqrt_intrinsic_three_args4(
620; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
621; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
622; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
623; CHECK-NEXT: ret double %1
624}
625
626define double @sqrt_intrinsic_three_args5(double %x, double %y) {
627  %mul = fmul fast double %x, %y
628  %mul2 = fmul fast double %x, %mul
629  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
630  ret double %sqrt
631
632; CHECK-LABEL: sqrt_intrinsic_three_args5(
633; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
634; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
635; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
636; CHECK-NEXT: ret double %1
637}
638
639define double @sqrt_intrinsic_three_args6(double %x, double %y) {
640  %mul = fmul fast double %x, %x
641  %mul2 = fmul fast double %y, %mul
642  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
643  ret double %sqrt
644
645; CHECK-LABEL: sqrt_intrinsic_three_args6(
646; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
647; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
648; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
649; CHECK-NEXT: ret double %1
650}
651
652; If any operation is not 'fast', we can't simplify.
653
654define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
655  %mul = fmul double %x, %x
656  %mul2 = fmul fast double %mul, %y
657  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
658  ret double %sqrt
659
660; CHECK-LABEL: sqrt_intrinsic_not_so_fast(
661; CHECK-NEXT:  %mul = fmul double %x, %x
662; CHECK-NEXT:  %mul2 = fmul fast double %mul, %y
663; CHECK-NEXT:  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
664; CHECK-NEXT:  ret double %sqrt
665}
666
667define double @sqrt_intrinsic_arg_4th(double %x) {
668  %mul = fmul fast double %x, %x
669  %mul2 = fmul fast double %mul, %mul
670  %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
671  ret double %sqrt
672
673; CHECK-LABEL: sqrt_intrinsic_arg_4th(
674; CHECK-NEXT: %mul = fmul fast double %x, %x
675; CHECK-NEXT: ret double %mul
676}
677
678define double @sqrt_intrinsic_arg_5th(double %x) {
679  %mul = fmul fast double %x, %x
680  %mul2 = fmul fast double %mul, %x
681  %mul3 = fmul fast double %mul2, %mul
682  %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
683  ret double %sqrt
684
685; CHECK-LABEL: sqrt_intrinsic_arg_5th(
686; CHECK-NEXT: %mul = fmul fast double %x, %x
687; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
688; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
689; CHECK-NEXT: ret double %1
690}
691
692; Check that square root calls have the same behavior.
693
694declare float @sqrtf(float)
695declare double @sqrt(double)
696declare fp128 @sqrtl(fp128)
697
698define float @sqrt_call_squared_f32(float %x) {
699  %mul = fmul fast float %x, %x
700  %sqrt = call fast float @sqrtf(float %mul)
701  ret float %sqrt
702
703; CHECK-LABEL: sqrt_call_squared_f32(
704; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x)
705; CHECK-NEXT: ret float %fabs
706}
707
708define double @sqrt_call_squared_f64(double %x) {
709  %mul = fmul fast double %x, %x
710  %sqrt = call fast double @sqrt(double %mul)
711  ret double %sqrt
712
713; CHECK-LABEL: sqrt_call_squared_f64(
714; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
715; CHECK-NEXT: ret double %fabs
716}
717
718define fp128 @sqrt_call_squared_f128(fp128 %x) {
719  %mul = fmul fast fp128 %x, %x
720  %sqrt = call fast fp128 @sqrtl(fp128 %mul)
721  ret fp128 %sqrt
722
723; CHECK-LABEL: sqrt_call_squared_f128(
724; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x)
725; CHECK-NEXT: ret fp128 %fabs
726}
727
728; =========================================================================
729;
730;   Test-cases for fmin / fmax
731;
732; =========================================================================
733
734declare double @fmax(double, double)
735declare double @fmin(double, double)
736declare float @fmaxf(float, float)
737declare float @fminf(float, float)
738declare fp128 @fmaxl(fp128, fp128)
739declare fp128 @fminl(fp128, fp128)
740
741; No NaNs is the minimum requirement to replace these calls.
742; This should always be set when unsafe-fp-math is true, but
743; alternate the attributes for additional test coverage.
744; 'nsz' is implied by the definition of fmax or fmin itself.
745
746; Shrink and remove the call.
747define float @max1(float %a, float %b) {
748  %c = fpext float %a to double
749  %d = fpext float %b to double
750  %e = call fast double @fmax(double %c, double %d)
751  %f = fptrunc double %e to float
752  ret float %f
753
754; CHECK-LABEL: max1(
755; CHECK-NEXT:  fcmp fast ogt float %a, %b
756; CHECK-NEXT:  select {{.*}} float %a, float %b
757; CHECK-NEXT:  ret
758}
759
760define float @max2(float %a, float %b) {
761  %c = call nnan float @fmaxf(float %a, float %b)
762  ret float %c
763
764; CHECK-LABEL: max2(
765; CHECK-NEXT:  fcmp nnan nsz ogt float %a, %b
766; CHECK-NEXT:  select {{.*}} float %a, float %b
767; CHECK-NEXT:  ret
768}
769
770
771define double @max3(double %a, double %b) {
772  %c = call fast double @fmax(double %a, double %b)
773  ret double %c
774
775; CHECK-LABEL: max3(
776; CHECK-NEXT:  fcmp fast ogt double %a, %b
777; CHECK-NEXT:  select {{.*}} double %a, double %b
778; CHECK-NEXT:  ret
779}
780
781define fp128 @max4(fp128 %a, fp128 %b) {
782  %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
783  ret fp128 %c
784
785; CHECK-LABEL: max4(
786; CHECK-NEXT:  fcmp nnan nsz ogt fp128 %a, %b
787; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b
788; CHECK-NEXT:  ret
789}
790
791; Shrink and remove the call.
792define float @min1(float %a, float %b) {
793  %c = fpext float %a to double
794  %d = fpext float %b to double
795  %e = call nnan double @fmin(double %c, double %d)
796  %f = fptrunc double %e to float
797  ret float %f
798
799; CHECK-LABEL: min1(
800; CHECK-NEXT:  fcmp nnan nsz olt float %a, %b
801; CHECK-NEXT:  select {{.*}} float %a, float %b
802; CHECK-NEXT:  ret
803}
804
805define float @min2(float %a, float %b) {
806  %c = call fast float @fminf(float %a, float %b)
807  ret float %c
808
809; CHECK-LABEL: min2(
810; CHECK-NEXT:  fcmp fast olt float %a, %b
811; CHECK-NEXT:  select {{.*}} float %a, float %b
812; CHECK-NEXT:  ret
813}
814
815define double @min3(double %a, double %b) {
816  %c = call nnan double @fmin(double %a, double %b)
817  ret double %c
818
819; CHECK-LABEL: min3(
820; CHECK-NEXT:  fcmp nnan nsz olt double %a, %b
821; CHECK-NEXT:  select {{.*}} double %a, double %b
822; CHECK-NEXT:  ret
823}
824
825define fp128 @min4(fp128 %a, fp128 %b) {
826  %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
827  ret fp128 %c
828
829; CHECK-LABEL: min4(
830; CHECK-NEXT:  fcmp fast olt fp128 %a, %b
831; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b
832; CHECK-NEXT:  ret
833}
834