1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3; (V * C1) * C2 => V * (C1 * C2) 4; Verify this doesn't fold when no fast-math-flags are specified 5define <4 x float> @test_fmul(<4 x float> %V) { 6; CHECK-LABEL: @test_fmul( 7; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 8; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00> 9; CHECK-NEXT: ret <4 x float> [[TMP2]] 10 %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 11 %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > 12 ret <4 x float> %Z 13} 14 15; (V * C1) * C2 => V * (C1 * C2) 16; Verify this folds with 'fast' 17define <4 x float> @test_fmul_fast(<4 x float> %V) { 18; CHECK-LABEL: @test_fmul_fast( 19; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01> 20; CHECK-NEXT: ret <4 x float> [[TMP1]] 21 %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 22 %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > 23 ret <4 x float> %Z 24} 25 26; (V * C1) * C2 => V * (C1 * C2) 27; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required) 28define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) { 29; CHECK-LABEL: @test_fmul_reassoc_nsz( 30; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01> 31; CHECK-NEXT: ret <4 x float> [[TMP1]] 32 %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 33 %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > 34 ret <4 x float> %Z 35} 36 37; (V * C1) * C2 => V * (C1 * C2) 38; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 } 39define <4 x float> @test_fmul_reassoc(<4 x float> %V) { 40; CHECK-LABEL: @test_fmul_reassoc( 41; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 42; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00> 43; CHECK-NEXT: ret <4 x float> [[TMP2]] 44 %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 45 %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > 46 ret <4 x float> %Z 47} 48 49; (V + C1) + C2 => V + (C1 + C2) 50; Verify this doesn't fold when no fast-math-flags are specified 51define <4 x float> @test_fadd(<4 x float> %V) { 52; CHECK-LABEL: @test_fadd( 53; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 54; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00> 55; CHECK-NEXT: ret <4 x float> [[TMP2]] 56 %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 57 %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > 58 ret <4 x float> %Z 59} 60 61; (V + C1) + C2 => V + (C1 + C2) 62; Verify this folds with 'fast' 63define <4 x float> @test_fadd_fast(<4 x float> %V) { 64; CHECK-LABEL: @test_fadd_fast( 65; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00> 66; CHECK-NEXT: ret <4 x float> [[TMP1]] 67 %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 68 %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > 69 ret <4 x float> %Z 70} 71 72; (V + C1) + C2 => V + (C1 + C2) 73; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required) 74define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) { 75; CHECK-LABEL: @test_fadd_reassoc_nsz( 76; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00> 77; CHECK-NEXT: ret <4 x float> [[TMP1]] 78 %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 79 %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > 80 ret <4 x float> %Z 81} 82 83; (V + C1) + C2 => V + (C1 + C2) 84; TODO: This doesn't require 'nsz'. It should fold to V + { 2.0, 4.0, 0.0, 8.0 } 85define <4 x float> @test_fadd_reassoc(<4 x float> %V) { 86; CHECK-LABEL: @test_fadd_reassoc( 87; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 88; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00> 89; CHECK-NEXT: ret <4 x float> [[TMP2]] 90 %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 91 %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > 92 ret <4 x float> %Z 93} 94 95; ( A + C1 ) + ( B + -C1 ) 96; Verify this doesn't fold when no fast-math-flags are specified 97define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) { 98; CHECK-LABEL: @test_fadds_cancel_( 99; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 100; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00> 101; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] 102; CHECK-NEXT: ret <4 x float> [[TMP3]] 103 %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 104 %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > 105 %Z = fadd <4 x float> %X, %Y 106 ret <4 x float> %Z 107} 108 109; ( A + C1 ) + ( B + -C1 ) 110; Verify this folds to 'A + B' with 'fast' 111define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) { 112; CHECK-LABEL: @test_fadds_cancel_fast( 113; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]] 114; CHECK-NEXT: ret <4 x float> [[TMP1]] 115 %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 116 %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > 117 %Z = fadd fast <4 x float> %X, %Y 118 ret <4 x float> %Z 119} 120 121; ( A + C1 ) + ( B + -C1 ) 122; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required) 123define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) { 124; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz( 125; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]] 126; CHECK-NEXT: ret <4 x float> [[TMP1]] 127 %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 128 %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > 129 %Z = fadd reassoc nsz <4 x float> %X, %Y 130 ret <4 x float> %Z 131} 132 133; ( A + C1 ) + ( B + -C1 ) 134; Verify the fold is not done with only 'reassoc' ('nsz' is required). 135define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) { 136; CHECK-LABEL: @test_fadds_cancel_reassoc( 137; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00> 138; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00> 139; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]] 140; CHECK-NEXT: ret <4 x float> [[TMP3]] 141 %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > 142 %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > 143 %Z = fadd reassoc <4 x float> %X, %Y 144 ret <4 x float> %Z 145} 146