1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 5; 1.2f and 2.3f is supposed to be fold. 6define float @fold(float %a) { 7; CHECK-LABEL: @fold( 8; CHECK-NEXT: [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000 9; CHECK-NEXT: ret float [[MUL1]] 10; 11 %mul = fmul fast float %a, 0x3FF3333340000000 12 %mul1 = fmul fast float %mul, 0x4002666660000000 13 ret float %mul1 14} 15 16; Same testing-case as the one used in fold() except that the operators have 17; fixed FP mode. 18define float @notfold(float %a) { 19; CHECK-LABEL: @notfold( 20; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000 21; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000 22; CHECK-NEXT: ret float [[MUL1]] 23; 24 %mul = fmul fast float %a, 0x3FF3333340000000 25 %mul1 = fmul float %mul, 0x4002666660000000 26 ret float %mul1 27} 28 29define float @fold2(float %a) { 30; CHECK-LABEL: @fold2( 31; CHECK-NEXT: [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000 32; CHECK-NEXT: ret float [[MUL1]] 33; 34 %mul = fmul float %a, 0x3FF3333340000000 35 %mul1 = fmul fast float %mul, 0x4002666660000000 36 ret float %mul1 37} 38 39; C * f1 + f1 = (C+1) * f1 40; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is 41; always safe, and so doesn't need any FMF. 42; That is, (x + x + x) and (3*x) each have only a single rounding. 43define double @fold3(double %f1) { 44; CHECK-LABEL: @fold3( 45; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00 46; CHECK-NEXT: ret double [[TMP1]] 47; 48 %t1 = fmul fast double 5.000000e+00, %f1 49 %t2 = fadd fast double %f1, %t1 50 ret double %t2 51} 52 53; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 54define double @fold3_reassoc_nsz(double %f1) { 55; CHECK-LABEL: @fold3_reassoc_nsz( 56; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00 57; CHECK-NEXT: ret double [[TMP1]] 58; 59 %t1 = fmul reassoc nsz double 5.000000e+00, %f1 60 %t2 = fadd reassoc nsz double %f1, %t1 61 ret double %t2 62} 63 64; TODO: This doesn't require 'nsz'. It should fold to f1 * 6.0. 65define double @fold3_reassoc(double %f1) { 66; CHECK-LABEL: @fold3_reassoc( 67; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00 68; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[F1]] 69; CHECK-NEXT: ret double [[TMP2]] 70; 71 %t1 = fmul reassoc double 5.000000e+00, %f1 72 %t2 = fadd reassoc double %f1, %t1 73 ret double %t2 74} 75 76; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 77define float @fold4(float %f1, float %f2) { 78; CHECK-LABEL: @fold4( 79; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]] 80; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]] 81; CHECK-NEXT: ret float [[TMP2]] 82; 83 %sub = fsub float 4.000000e+00, %f1 84 %sub1 = fsub float 5.000000e+00, %f2 85 %add = fadd fast float %sub, %sub1 86 ret float %add 87} 88 89; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 90define float @fold4_reassoc_nsz(float %f1, float %f2) { 91; CHECK-LABEL: @fold4_reassoc_nsz( 92; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] 93; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]] 94; CHECK-NEXT: ret float [[TMP2]] 95; 96 %sub = fsub float 4.000000e+00, %f1 97 %sub1 = fsub float 5.000000e+00, %f2 98 %add = fadd reassoc nsz float %sub, %sub1 99 ret float %add 100} 101 102; TODO: This doesn't require 'nsz'. It should fold to (9.0 - (f1 + f2)). 103define float @fold4_reassoc(float %f1, float %f2) { 104; CHECK-LABEL: @fold4_reassoc( 105; CHECK-NEXT: [[TMP1:%.*]] = fsub float 4.000000e+00, [[F1:%.*]] 106; CHECK-NEXT: [[TMP2:%.*]] = fsub float 5.000000e+00, [[F2:%.*]] 107; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 108; CHECK-NEXT: ret float [[TMP3]] 109; 110 %sub = fsub float 4.000000e+00, %f1 111 %sub1 = fsub float 5.000000e+00, %f2 112 %add = fadd reassoc float %sub, %sub1 113 ret float %add 114} 115 116; (X + C1) + C2 => X + (C1 + C2) 117define float @fold5(float %f1) { 118; CHECK-LABEL: @fold5( 119; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00 120; CHECK-NEXT: ret float [[ADD1]] 121; 122 %add = fadd float %f1, 4.000000e+00 123 %add1 = fadd fast float %add, 5.000000e+00 124 ret float %add1 125} 126 127; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 128define float @fold5_reassoc_nsz(float %f1) { 129; CHECK-LABEL: @fold5_reassoc_nsz( 130; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00 131; CHECK-NEXT: ret float [[ADD1]] 132; 133 %add = fadd float %f1, 4.000000e+00 134 %add1 = fadd reassoc nsz float %add, 5.000000e+00 135 ret float %add1 136} 137 138; TODO: This doesn't require 'nsz'. It should fold to f1 + 9.0 139define float @fold5_reassoc(float %f1) { 140; CHECK-LABEL: @fold5_reassoc( 141; CHECK-NEXT: [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00 142; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00 143; CHECK-NEXT: ret float [[ADD1]] 144; 145 %add = fadd float %f1, 4.000000e+00 146 %add1 = fadd reassoc float %add, 5.000000e+00 147 ret float %add1 148} 149 150; (X + X) + X + X => 4.0 * X 151define float @fold6(float %f1) { 152; CHECK-LABEL: @fold6( 153; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00 154; CHECK-NEXT: ret float [[TMP1]] 155; 156 %t1 = fadd fast float %f1, %f1 157 %t2 = fadd fast float %f1, %t1 158 %t3 = fadd fast float %t2, %f1 159 ret float %t3 160} 161 162; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 163define float @fold6_reassoc_nsz(float %f1) { 164; CHECK-LABEL: @fold6_reassoc_nsz( 165; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00 166; CHECK-NEXT: ret float [[TMP1]] 167; 168 %t1 = fadd reassoc nsz float %f1, %f1 169 %t2 = fadd reassoc nsz float %f1, %t1 170 %t3 = fadd reassoc nsz float %t2, %f1 171 ret float %t3 172} 173 174; TODO: This doesn't require 'nsz'. It should fold to f1 * 4.0. 175define float @fold6_reassoc(float %f1) { 176; CHECK-LABEL: @fold6_reassoc( 177; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] 178; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[TMP1]], [[F1]] 179; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP2]], [[F1]] 180; CHECK-NEXT: ret float [[TMP3]] 181; 182 %t1 = fadd reassoc float %f1, %f1 183 %t2 = fadd reassoc float %f1, %t1 184 %t3 = fadd reassoc float %t2, %f1 185 ret float %t3 186} 187 188; C1 * X + (X + X) = (C1 + 2) * X 189define float @fold7(float %f1) { 190; CHECK-LABEL: @fold7( 191; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00 192; CHECK-NEXT: ret float [[TMP1]] 193; 194 %t1 = fmul fast float %f1, 5.000000e+00 195 %t2 = fadd fast float %f1, %f1 196 %t3 = fadd fast float %t1, %t2 197 ret float %t3 198} 199 200; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 201define float @fold7_reassoc_nsz(float %f1) { 202; CHECK-LABEL: @fold7_reassoc_nsz( 203; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00 204; CHECK-NEXT: ret float [[TMP1]] 205; 206 %t1 = fmul reassoc nsz float %f1, 5.000000e+00 207 %t2 = fadd reassoc nsz float %f1, %f1 208 %t3 = fadd reassoc nsz float %t1, %t2 209 ret float %t3 210} 211 212; TODO: This doesn't require 'nsz'. It should fold to f1 * 7.0. 213define float @fold7_reassoc(float %f1) { 214; CHECK-LABEL: @fold7_reassoc( 215; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00 216; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] 217; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 218; CHECK-NEXT: ret float [[TMP3]] 219; 220 %t1 = fmul reassoc float %f1, 5.000000e+00 221 %t2 = fadd reassoc float %f1, %f1 222 %t3 = fadd reassoc float %t1, %t2 223 ret float %t3 224} 225 226; (X + X) + (X + X) + X => 5.0 * X 227define float @fold8(float %f1) { 228; CHECK-LABEL: @fold8( 229; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00 230; CHECK-NEXT: ret float [[TMP1]] 231; 232 %t1 = fadd fast float %f1, %f1 233 %t2 = fadd fast float %f1, %f1 234 %t3 = fadd fast float %t1, %t2 235 %t4 = fadd fast float %t3, %f1 236 ret float %t4 237} 238 239; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 240define float @fold8_reassoc_nsz(float %f1) { 241; CHECK-LABEL: @fold8_reassoc_nsz( 242; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00 243; CHECK-NEXT: ret float [[TMP1]] 244; 245 %t1 = fadd reassoc nsz float %f1, %f1 246 %t2 = fadd reassoc nsz float %f1, %f1 247 %t3 = fadd reassoc nsz float %t1, %t2 248 %t4 = fadd reassoc nsz float %t3, %f1 249 ret float %t4 250} 251 252; TODO: This doesn't require 'nsz'. It should fold to f1 * 5.0. 253define float @fold8_reassoc(float %f1) { 254; CHECK-LABEL: @fold8_reassoc( 255; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] 256; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] 257; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 258; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc float [[TMP3]], [[F1]] 259; CHECK-NEXT: ret float [[TMP4]] 260; 261 %t1 = fadd reassoc float %f1, %f1 262 %t2 = fadd reassoc float %f1, %f1 263 %t3 = fadd reassoc float %t1, %t2 264 %t4 = fadd reassoc float %t3, %f1 265 ret float %t4 266} 267 268; X - (X + Y) => 0 - Y 269define float @fold9(float %f1, float %f2) { 270; CHECK-LABEL: @fold9( 271; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[F2:%.*]] 272; CHECK-NEXT: ret float [[TMP1]] 273; 274 %t1 = fadd float %f1, %f2 275 %t3 = fsub fast float %f1, %t1 276 ret float %t3 277} 278 279; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 280define float @fold9_reassoc_nsz(float %f1, float %f2) { 281; CHECK-LABEL: @fold9_reassoc_nsz( 282; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[F2:%.*]] 283; CHECK-NEXT: ret float [[TMP1]] 284; 285 %t1 = fadd float %f1, %f2 286 %t3 = fsub reassoc nsz float %f1, %t1 287 ret float %t3 288} 289 290; TODO: This doesn't require 'nsz'. It should fold to 0 - f2 291define float @fold9_reassoc(float %f1, float %f2) { 292; CHECK-LABEL: @fold9_reassoc( 293; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]] 294; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[F1]], [[TMP1]] 295; CHECK-NEXT: ret float [[TMP2]] 296; 297 %t1 = fadd float %f1, %f2 298 %t3 = fsub reassoc float %f1, %t1 299 ret float %t3 300} 301 302; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 303; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 304; top of resulting simplified expression tree may potentially reveal some 305; optimization opportunities in the super-expression trees. 306; 307define float @fold10(float %f1, float %f2) { 308; CHECK-LABEL: @fold10( 309; CHECK-NEXT: [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]] 310; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00 311; CHECK-NEXT: ret float [[T3]] 312; 313 %t1 = fadd fast float 2.000000e+00, %f1 314 %t2 = fsub fast float %f2, 3.000000e+00 315 %t3 = fadd fast float %t1, %t2 316 ret float %t3 317} 318 319; Check again with 'reassoc' and 'nsz'. 320; TODO: We may be able to remove the 'nsz' requirement. 321define float @fold10_reassoc_nsz(float %f1, float %f2) { 322; CHECK-LABEL: @fold10_reassoc_nsz( 323; CHECK-NEXT: [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] 324; CHECK-NEXT: [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00 325; CHECK-NEXT: ret float [[T3]] 326; 327 %t1 = fadd reassoc nsz float 2.000000e+00, %f1 328 %t2 = fsub reassoc nsz float %f2, 3.000000e+00 329 %t3 = fadd reassoc nsz float %t1, %t2 330 ret float %t3 331} 332 333; Observe that the fold is not done with only reassoc (the instructions are 334; canonicalized, but not folded). 335; TODO: As noted above, 'nsz' may not be required for this to be fully folded. 336define float @fold10_reassoc(float %f1, float %f2) { 337; CHECK-LABEL: @fold10_reassoc( 338; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00 339; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00 340; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 341; CHECK-NEXT: ret float [[TMP3]] 342; 343 %t1 = fadd reassoc float 2.000000e+00, %f1 344 %t2 = fsub reassoc float %f2, 3.000000e+00 345 %t3 = fadd reassoc float %t1, %t2 346 ret float %t3 347} 348 349; This used to crash/miscompile. 350 351define float @fail1(float %f1, float %f2) { 352; CHECK-LABEL: @fail1( 353; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00 354; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00 355; CHECK-NEXT: ret float [[TMP2]] 356; 357 %conv3 = fadd fast float %f1, -1.000000e+00 358 %add = fadd fast float %conv3, %conv3 359 %add2 = fadd fast float %add, %conv3 360 ret float %add2 361} 362 363define double @fail2(double %f1, double %f2) { 364; CHECK-LABEL: @fail2( 365; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]] 366; CHECK-NEXT: [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]] 367; CHECK-NEXT: ret double [[TMP2]] 368; 369 %t1 = fsub fast double %f1, %f2 370 %t2 = fadd fast double %f1, %f2 371 %t3 = fsub fast double %t1, %t2 372 ret double %t3 373} 374 375; c1 * x - x => (c1 - 1.0) * x 376define float @fold13(float %x) { 377; CHECK-LABEL: @fold13( 378; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 6.000000e+00 379; CHECK-NEXT: ret float [[TMP1]] 380; 381 %mul = fmul fast float %x, 7.000000e+00 382 %sub = fsub fast float %mul, %x 383 ret float %sub 384} 385 386; Check again using the minimal subset of FMF. 387define float @fold13_reassoc_nsz(float %x) { 388; CHECK-LABEL: @fold13_reassoc_nsz( 389; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00 390; CHECK-NEXT: ret float [[TMP1]] 391; 392 %mul = fmul reassoc nsz float %x, 7.000000e+00 393 %sub = fsub reassoc nsz float %mul, %x 394 ret float %sub 395} 396 397; Verify the fold is not done with only 'reassoc' ('nsz' is required). 398define float @fold13_reassoc(float %x) { 399; CHECK-LABEL: @fold13_reassoc( 400; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00 401; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[TMP1]], [[X]] 402; CHECK-NEXT: ret float [[TMP2]] 403; 404 %mul = fmul reassoc float %x, 7.000000e+00 405 %sub = fsub reassoc float %mul, %x 406 ret float %sub 407} 408 409; (select X+Y, X-Y) => X + (select Y, -Y) 410; This is always safe. No FMF required. 411define float @fold16(float %x, float %y) { 412; CHECK-LABEL: @fold16( 413; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] 414; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]] 415; CHECK-NEXT: [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]] 416; CHECK-NEXT: [[R:%.*]] = fadd float [[R_P]], [[X]] 417; CHECK-NEXT: ret float [[R]] 418; 419 %cmp = fcmp ogt float %x, %y 420 %plus = fadd float %x, %y 421 %minus = fsub float %x, %y 422 %r = select i1 %cmp, float %plus, float %minus 423 ret float %r 424} 425 426; ========================================================================= 427; 428; Testing-cases about negation 429; 430; ========================================================================= 431define float @fneg1(float %f1, float %f2) { 432; CHECK-LABEL: @fneg1( 433; CHECK-NEXT: [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]] 434; CHECK-NEXT: ret float [[MUL]] 435; 436 %sub = fsub float -0.000000e+00, %f1 437 %sub1 = fsub nsz float 0.000000e+00, %f2 438 %mul = fmul float %sub, %sub1 439 ret float %mul 440} 441 442define float @fneg2(float %x) { 443; CHECK-LABEL: @fneg2( 444; CHECK-NEXT: [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]] 445; CHECK-NEXT: ret float [[SUB]] 446; 447 %sub = fsub nsz float 0.0, %x 448 ret float %sub 449} 450 451define <2 x float> @fneg2_vec_undef(<2 x float> %x) { 452; CHECK-LABEL: @fneg2_vec_undef( 453; CHECK-NEXT: [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]] 454; CHECK-NEXT: ret <2 x float> [[SUB]] 455; 456 %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x 457 ret <2 x float> %sub 458} 459 460; ========================================================================= 461; 462; Testing-cases about div 463; 464; ========================================================================= 465 466; X/C1 / C2 => X * (1/(C2*C1)) 467define float @fdiv1(float %x) { 468; CHECK-LABEL: @fdiv1( 469; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000 470; CHECK-NEXT: ret float [[DIV1]] 471; 472 %div = fdiv float %x, 0x3FF3333340000000 473 %div1 = fdiv fast float %div, 0x4002666660000000 474 ret float %div1 475; 0x3FF3333340000000 = 1.2f 476; 0x4002666660000000 = 2.3f 477; 0x3FD7303B60000000 = 0.36231884057971014492 478} 479 480; X*C1 / C2 => X * (C1/C2) 481define float @fdiv2(float %x) { 482; CHECK-LABEL: @fdiv2( 483; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000 484; CHECK-NEXT: ret float [[DIV1]] 485; 486 %mul = fmul float %x, 0x3FF3333340000000 487 %div1 = fdiv fast float %mul, 0x4002666660000000 488 ret float %div1 489 490; 0x3FF3333340000000 = 1.2f 491; 0x4002666660000000 = 2.3f 492; 0x3FE0B21660000000 = 0.52173918485641479492 493} 494 495define <2 x float> @fdiv2_vec(<2 x float> %x) { 496; CHECK-LABEL: @fdiv2_vec( 497; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00> 498; CHECK-NEXT: ret <2 x float> [[DIV1]] 499; 500 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 501 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 502 ret <2 x float> %div1 503} 504 505; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 506; 507define float @fdiv3(float %x) { 508; CHECK-LABEL: @fdiv3( 509; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47EFFFFFE0000000 510; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[DIV]], 0x3FDBD37A80000000 511; CHECK-NEXT: ret float [[DIV1]] 512; 513 %div = fdiv float %x, 0x47EFFFFFE0000000 514 %div1 = fdiv fast float %div, 0x4002666660000000 515 ret float %div1 516} 517 518; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 519define float @fdiv4(float %x) { 520; CHECK-LABEL: @fdiv4( 521; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000 522; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000 523; CHECK-NEXT: ret float [[DIV]] 524; 525 %mul = fmul float %x, 0x47EFFFFFE0000000 526 %div = fdiv float %mul, 0x3FC99999A0000000 527 ret float %div 528} 529 530; ========================================================================= 531; 532; Testing-cases about factorization 533; 534; ========================================================================= 535; x*z + y*z => (x+y) * z 536define float @fact_mul1(float %x, float %y, float %z) { 537; CHECK-LABEL: @fact_mul1( 538; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]] 539; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 540; CHECK-NEXT: ret float [[TMP2]] 541; 542 %t1 = fmul fast float %x, %z 543 %t2 = fmul fast float %y, %z 544 %t3 = fadd fast float %t1, %t2 545 ret float %t3 546} 547 548; Check again using the minimal subset of FMF. 549define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) { 550; CHECK-LABEL: @fact_mul1_reassoc_nsz( 551; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]] 552; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 553; CHECK-NEXT: ret float [[TMP2]] 554; 555 %t1 = fmul reassoc nsz float %x, %z 556 %t2 = fmul reassoc nsz float %y, %z 557 %t3 = fadd reassoc nsz float %t1, %t2 558 ret float %t3 559} 560 561; Verify the fold is not done with only 'reassoc' ('nsz' is required). 562define float @fact_mul1_reassoc(float %x, float %y, float %z) { 563; CHECK-LABEL: @fact_mul1_reassoc( 564; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] 565; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]] [[Z]] 566; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 567; CHECK-NEXT: ret float [[TMP3]] 568; 569 %t1 = fmul reassoc float %x, %z 570 %t2 = fmul reassoc float %y, %z 571 %t3 = fadd reassoc float %t1, %t2 572 ret float %t3 573} 574 575; z*x + y*z => (x+y) * z 576define float @fact_mul2(float %x, float %y, float %z) { 577; CHECK-LABEL: @fact_mul2( 578; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 579; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 580; CHECK-NEXT: ret float [[TMP2]] 581; 582 %t1 = fmul fast float %z, %x 583 %t2 = fmul fast float %y, %z 584 %t3 = fsub fast float %t1, %t2 585 ret float %t3 586} 587 588; Check again using the minimal subset of FMF. 589define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) { 590; CHECK-LABEL: @fact_mul2_reassoc_nsz( 591; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 592; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 593; CHECK-NEXT: ret float [[TMP2]] 594; 595 %t1 = fmul reassoc nsz float %z, %x 596 %t2 = fmul reassoc nsz float %y, %z 597 %t3 = fsub reassoc nsz float %t1, %t2 598 ret float %t3 599} 600 601; Verify the fold is not done with only 'reassoc' ('nsz' is required). 602define float @fact_mul2_reassoc(float %x, float %y, float %z) { 603; CHECK-LABEL: @fact_mul2_reassoc( 604; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]] 605; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]] 606; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 607; CHECK-NEXT: ret float [[TMP3]] 608; 609 %t1 = fmul reassoc float %z, %x 610 %t2 = fmul reassoc float %y, %z 611 %t3 = fsub reassoc float %t1, %t2 612 ret float %t3 613} 614 615; z*x - z*y => (x-y) * z 616define float @fact_mul3(float %x, float %y, float %z) { 617; CHECK-LABEL: @fact_mul3( 618; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 619; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 620; CHECK-NEXT: ret float [[TMP2]] 621; 622 %t2 = fmul fast float %z, %y 623 %t1 = fmul fast float %z, %x 624 %t3 = fsub fast float %t1, %t2 625 ret float %t3 626} 627 628; Check again using the minimal subset of FMF. 629define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) { 630; CHECK-LABEL: @fact_mul3_reassoc_nsz( 631; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 632; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 633; CHECK-NEXT: ret float [[TMP2]] 634; 635 %t2 = fmul reassoc nsz float %z, %y 636 %t1 = fmul reassoc nsz float %z, %x 637 %t3 = fsub reassoc nsz float %t1, %t2 638 ret float %t3 639} 640 641; Verify the fold is not done with only 'reassoc' ('nsz' is required). 642define float @fact_mul3_reassoc(float %x, float %y, float %z) { 643; CHECK-LABEL: @fact_mul3_reassoc( 644; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]] 645; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]] 646; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 647; CHECK-NEXT: ret float [[TMP3]] 648; 649 %t2 = fmul reassoc float %z, %y 650 %t1 = fmul reassoc float %z, %x 651 %t3 = fsub reassoc float %t1, %t2 652 ret float %t3 653} 654 655; x*z - z*y => (x-y) * z 656define float @fact_mul4(float %x, float %y, float %z) { 657; CHECK-LABEL: @fact_mul4( 658; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 659; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 660; CHECK-NEXT: ret float [[TMP2]] 661; 662 %t1 = fmul fast float %x, %z 663 %t2 = fmul fast float %z, %y 664 %t3 = fsub fast float %t1, %t2 665 ret float %t3 666} 667 668; Check again using the minimal subset of FMF. 669define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) { 670; CHECK-LABEL: @fact_mul4_reassoc_nsz( 671; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 672; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 673; CHECK-NEXT: ret float [[TMP2]] 674; 675 %t1 = fmul reassoc nsz float %x, %z 676 %t2 = fmul reassoc nsz float %z, %y 677 %t3 = fsub reassoc nsz float %t1, %t2 678 ret float %t3 679} 680 681; Verify the fold is not done with only 'reassoc' ('nsz' is required). 682define float @fact_mul4_reassoc(float %x, float %y, float %z) { 683; CHECK-LABEL: @fact_mul4_reassoc( 684; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] 685; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]] 686; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 687; CHECK-NEXT: ret float [[TMP3]] 688; 689 %t1 = fmul reassoc float %x, %z 690 %t2 = fmul reassoc float %z, %y 691 %t3 = fsub reassoc float %t1, %t2 692 ret float %t3 693} 694 695; x/y + x/z, no xform 696define float @fact_div1(float %x, float %y, float %z) { 697; CHECK-LABEL: @fact_div1( 698; CHECK-NEXT: [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]] 699; CHECK-NEXT: [[T2:%.*]] = fdiv fast float [[X]], [[Z:%.*]] 700; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T1]], [[T2]] 701; CHECK-NEXT: ret float [[T3]] 702; 703 %t1 = fdiv fast float %x, %y 704 %t2 = fdiv fast float %x, %z 705 %t3 = fadd fast float %t1, %t2 706 ret float %t3 707} 708 709; x/y + z/x; no xform 710define float @fact_div2(float %x, float %y, float %z) { 711; CHECK-LABEL: @fact_div2( 712; CHECK-NEXT: [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]] 713; CHECK-NEXT: [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]] 714; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T1]], [[T2]] 715; CHECK-NEXT: ret float [[T3]] 716; 717 %t1 = fdiv fast float %x, %y 718 %t2 = fdiv fast float %z, %x 719 %t3 = fadd fast float %t1, %t2 720 ret float %t3 721} 722 723; y/x + z/x => (y+z)/x 724define float @fact_div3(float %x, float %y, float %z) { 725; CHECK-LABEL: @fact_div3( 726; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[Y:%.*]], [[Z:%.*]] 727; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]] 728; CHECK-NEXT: ret float [[TMP2]] 729; 730 %t1 = fdiv fast float %y, %x 731 %t2 = fdiv fast float %z, %x 732 %t3 = fadd fast float %t1, %t2 733 ret float %t3 734} 735 736; Check again using the minimal subset of FMF. 737define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) { 738; CHECK-LABEL: @fact_div3_reassoc_nsz( 739; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]] 740; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] 741; CHECK-NEXT: ret float [[TMP2]] 742; 743 %t1 = fdiv reassoc nsz float %y, %x 744 %t2 = fdiv reassoc nsz float %z, %x 745 %t3 = fadd reassoc nsz float %t1, %t2 746 ret float %t3 747} 748 749; Verify the fold is not done with only 'reassoc' ('nsz' is required). 750define float @fact_div3_reassoc(float %x, float %y, float %z) { 751; CHECK-LABEL: @fact_div3_reassoc( 752; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] 753; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] 754; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 755; CHECK-NEXT: ret float [[TMP3]] 756; 757 %t1 = fdiv reassoc float %y, %x 758 %t2 = fdiv reassoc float %z, %x 759 %t3 = fadd reassoc float %t1, %t2 760 ret float %t3 761} 762 763; y/x - z/x => (y-z)/x 764define float @fact_div4(float %x, float %y, float %z) { 765; CHECK-LABEL: @fact_div4( 766; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Y:%.*]], [[Z:%.*]] 767; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]] 768; CHECK-NEXT: ret float [[TMP2]] 769; 770 %t1 = fdiv fast float %y, %x 771 %t2 = fdiv fast float %z, %x 772 %t3 = fsub fast float %t1, %t2 773 ret float %t3 774} 775 776; Check again using the minimal subset of FMF. 777define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) { 778; CHECK-LABEL: @fact_div4_reassoc_nsz( 779; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]] 780; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] 781; CHECK-NEXT: ret float [[TMP2]] 782; 783 %t1 = fdiv reassoc nsz float %y, %x 784 %t2 = fdiv reassoc nsz float %z, %x 785 %t3 = fsub reassoc nsz float %t1, %t2 786 ret float %t3 787} 788 789; Verify the fold is not done with only 'reassoc' ('nsz' is required). 790define float @fact_div4_reassoc(float %x, float %y, float %z) { 791; CHECK-LABEL: @fact_div4_reassoc( 792; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] 793; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] 794; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 795; CHECK-NEXT: ret float [[TMP3]] 796; 797 %t1 = fdiv reassoc float %y, %x 798 %t2 = fdiv reassoc float %z, %x 799 %t3 = fsub reassoc float %t1, %t2 800 ret float %t3 801} 802 803; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 804define float @fact_div5(float %x) { 805; CHECK-LABEL: @fact_div5( 806; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]] 807; CHECK-NEXT: ret float [[TMP1]] 808; 809 %t1 = fdiv fast float 0x3810000000000000, %x 810 %t2 = fdiv fast float 0x3800000000000000, %x 811 %t3 = fadd fast float %t1, %t2 812 ret float %t3 813} 814 815; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 816define float @fact_div6(float %x) { 817; CHECK-LABEL: @fact_div6( 818; CHECK-NEXT: [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]] 819; CHECK-NEXT: [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]] 820; CHECK-NEXT: [[T3:%.*]] = fsub fast float [[T1]], [[T2]] 821; CHECK-NEXT: ret float [[T3]] 822; 823 %t1 = fdiv fast float 0x3810000000000000, %x 824 %t2 = fdiv fast float 0x3800000000000000, %x 825 %t3 = fsub fast float %t1, %t2 826 ret float %t3 827} 828 829; ========================================================================= 830; 831; Test-cases for square root 832; 833; ========================================================================= 834 835; A squared factor fed into a square root intrinsic should be hoisted out 836; as a fabs() value. 837 838declare double @llvm.sqrt.f64(double) 839 840define double @sqrt_intrinsic_arg_squared(double %x) { 841; CHECK-LABEL: @sqrt_intrinsic_arg_squared( 842; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 843; CHECK-NEXT: ret double [[FABS]] 844; 845 %mul = fmul fast double %x, %x 846 %sqrt = call fast double @llvm.sqrt.f64(double %mul) 847 ret double %sqrt 848} 849 850; Check all 6 combinations of a 3-way multiplication tree where 851; one factor is repeated. 852 853define double @sqrt_intrinsic_three_args1(double %x, double %y) { 854; CHECK-LABEL: @sqrt_intrinsic_three_args1( 855; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 856; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 857; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 858; CHECK-NEXT: ret double [[TMP1]] 859; 860 %mul = fmul fast double %y, %x 861 %mul2 = fmul fast double %mul, %x 862 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 863 ret double %sqrt 864} 865 866define double @sqrt_intrinsic_three_args2(double %x, double %y) { 867; CHECK-LABEL: @sqrt_intrinsic_three_args2( 868; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 869; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 870; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 871; CHECK-NEXT: ret double [[TMP1]] 872; 873 %mul = fmul fast double %x, %y 874 %mul2 = fmul fast double %mul, %x 875 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 876 ret double %sqrt 877} 878 879define double @sqrt_intrinsic_three_args3(double %x, double %y) { 880; CHECK-LABEL: @sqrt_intrinsic_three_args3( 881; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 882; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 883; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 884; CHECK-NEXT: ret double [[TMP1]] 885; 886 %mul = fmul fast double %x, %x 887 %mul2 = fmul fast double %mul, %y 888 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 889 ret double %sqrt 890} 891 892define double @sqrt_intrinsic_three_args4(double %x, double %y) { 893; CHECK-LABEL: @sqrt_intrinsic_three_args4( 894; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 895; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 896; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 897; CHECK-NEXT: ret double [[TMP1]] 898; 899 %mul = fmul fast double %y, %x 900 %mul2 = fmul fast double %x, %mul 901 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 902 ret double %sqrt 903} 904 905define double @sqrt_intrinsic_three_args5(double %x, double %y) { 906; CHECK-LABEL: @sqrt_intrinsic_three_args5( 907; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 908; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 909; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 910; CHECK-NEXT: ret double [[TMP1]] 911; 912 %mul = fmul fast double %x, %y 913 %mul2 = fmul fast double %x, %mul 914 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 915 ret double %sqrt 916} 917 918define double @sqrt_intrinsic_three_args6(double %x, double %y) { 919; CHECK-LABEL: @sqrt_intrinsic_three_args6( 920; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 921; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 922; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 923; CHECK-NEXT: ret double [[TMP1]] 924; 925 %mul = fmul fast double %x, %x 926 %mul2 = fmul fast double %y, %mul 927 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 928 ret double %sqrt 929} 930 931; If any operation is not 'fast', we can't simplify. 932 933define double @sqrt_intrinsic_not_so_fast(double %x, double %y) { 934; CHECK-LABEL: @sqrt_intrinsic_not_so_fast( 935; CHECK-NEXT: [[MUL:%.*]] = fmul double [[X:%.*]], [[X]] 936; CHECK-NEXT: [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]] 937; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]]) 938; CHECK-NEXT: ret double [[SQRT]] 939; 940 %mul = fmul double %x, %x 941 %mul2 = fmul fast double %mul, %y 942 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 943 ret double %sqrt 944} 945 946define double @sqrt_intrinsic_arg_4th(double %x) { 947; CHECK-LABEL: @sqrt_intrinsic_arg_4th( 948; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]] 949; CHECK-NEXT: ret double [[MUL]] 950; 951 %mul = fmul fast double %x, %x 952 %mul2 = fmul fast double %mul, %mul 953 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 954 ret double %sqrt 955} 956 957define double @sqrt_intrinsic_arg_5th(double %x) { 958; CHECK-LABEL: @sqrt_intrinsic_arg_5th( 959; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]] 960; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]]) 961; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]] 962; CHECK-NEXT: ret double [[TMP1]] 963; 964 %mul = fmul fast double %x, %x 965 %mul2 = fmul fast double %mul, %x 966 %mul3 = fmul fast double %mul2, %mul 967 %sqrt = call fast double @llvm.sqrt.f64(double %mul3) 968 ret double %sqrt 969} 970 971; Check that square root calls have the same behavior. 972 973declare float @sqrtf(float) 974declare double @sqrt(double) 975declare fp128 @sqrtl(fp128) 976 977define float @sqrt_call_squared_f32(float %x) { 978; CHECK-LABEL: @sqrt_call_squared_f32( 979; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) 980; CHECK-NEXT: ret float [[FABS]] 981; 982 %mul = fmul fast float %x, %x 983 %sqrt = call fast float @sqrtf(float %mul) 984 ret float %sqrt 985} 986 987define double @sqrt_call_squared_f64(double %x) { 988; CHECK-LABEL: @sqrt_call_squared_f64( 989; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 990; CHECK-NEXT: ret double [[FABS]] 991; 992 %mul = fmul fast double %x, %x 993 %sqrt = call fast double @sqrt(double %mul) 994 ret double %sqrt 995} 996 997define fp128 @sqrt_call_squared_f128(fp128 %x) { 998; CHECK-LABEL: @sqrt_call_squared_f128( 999; CHECK-NEXT: [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]]) 1000; CHECK-NEXT: ret fp128 [[FABS]] 1001; 1002 %mul = fmul fast fp128 %x, %x 1003 %sqrt = call fast fp128 @sqrtl(fp128 %mul) 1004 ret fp128 %sqrt 1005} 1006 1007; ========================================================================= 1008; 1009; Test-cases for fmin / fmax 1010; 1011; ========================================================================= 1012 1013declare double @fmax(double, double) 1014declare double @fmin(double, double) 1015declare float @fmaxf(float, float) 1016declare float @fminf(float, float) 1017declare fp128 @fmaxl(fp128, fp128) 1018declare fp128 @fminl(fp128, fp128) 1019 1020; No NaNs is the minimum requirement to replace these calls. 1021; This should always be set when unsafe-fp-math is true, but 1022; alternate the attributes for additional test coverage. 1023; 'nsz' is implied by the definition of fmax or fmin itself. 1024 1025; Shrink and remove the call. 1026define float @max1(float %a, float %b) { 1027; CHECK-LABEL: @max1( 1028; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]] 1029; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1030; CHECK-NEXT: ret float [[TMP2]] 1031; 1032 %c = fpext float %a to double 1033 %d = fpext float %b to double 1034 %e = call fast double @fmax(double %c, double %d) 1035 %f = fptrunc double %e to float 1036 ret float %f 1037} 1038 1039define float @max2(float %a, float %b) { 1040; CHECK-LABEL: @max2( 1041; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]] 1042; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1043; CHECK-NEXT: ret float [[TMP2]] 1044; 1045 %c = call nnan float @fmaxf(float %a, float %b) 1046 ret float %c 1047} 1048 1049 1050define double @max3(double %a, double %b) { 1051; CHECK-LABEL: @max3( 1052; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]] 1053; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]] 1054; CHECK-NEXT: ret double [[TMP2]] 1055; 1056 %c = call fast double @fmax(double %a, double %b) 1057 ret double %c 1058} 1059 1060define fp128 @max4(fp128 %a, fp128 %b) { 1061; CHECK-LABEL: @max4( 1062; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]] 1063; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]] 1064; CHECK-NEXT: ret fp128 [[TMP2]] 1065; 1066 %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b) 1067 ret fp128 %c 1068} 1069 1070; Shrink and remove the call. 1071define float @min1(float %a, float %b) { 1072; CHECK-LABEL: @min1( 1073; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]] 1074; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1075; CHECK-NEXT: ret float [[TMP2]] 1076; 1077 %c = fpext float %a to double 1078 %d = fpext float %b to double 1079 %e = call nnan double @fmin(double %c, double %d) 1080 %f = fptrunc double %e to float 1081 ret float %f 1082} 1083 1084define float @min2(float %a, float %b) { 1085; CHECK-LABEL: @min2( 1086; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]] 1087; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1088; CHECK-NEXT: ret float [[TMP2]] 1089; 1090 %c = call fast float @fminf(float %a, float %b) 1091 ret float %c 1092} 1093 1094define double @min3(double %a, double %b) { 1095; CHECK-LABEL: @min3( 1096; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]] 1097; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]] 1098; CHECK-NEXT: ret double [[TMP2]] 1099; 1100 %c = call nnan double @fmin(double %a, double %b) 1101 ret double %c 1102} 1103 1104define fp128 @min4(fp128 %a, fp128 %b) { 1105; CHECK-LABEL: @min4( 1106; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]] 1107; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]] 1108; CHECK-NEXT: ret fp128 [[TMP2]] 1109; 1110 %c = call fast fp128 @fminl(fp128 %a, fp128 %b) 1111 ret fp128 %c 1112} 1113 1114; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0)) 1115; This is always safe. No FMF required. 1116define float @test55(i1 %which, float %a) { 1117; CHECK-LABEL: @test55( 1118; CHECK-NEXT: entry: 1119; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] 1120; CHECK: delay: 1121; CHECK-NEXT: [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00 1122; CHECK-NEXT: br label [[FINAL]] 1123; CHECK: final: 1124; CHECK-NEXT: [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ] 1125; CHECK-NEXT: ret float [[A]] 1126; 1127entry: 1128 br i1 %which, label %final, label %delay 1129 1130delay: 1131 br label %final 1132 1133final: 1134 %A = phi float [ 2.0, %entry ], [ %a, %delay ] 1135 %value = fadd float %A, 1.0 1136 ret float %value 1137} 1138