1; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s 2 3; CHECK-LABEL: test1 4; CHECK-NOT: fmul.2s 5; CHECK: fcvtzs.2s v0, v0, #4 6; CHECK: ret 7define <2 x i32> @test1(<2 x float> %f) { 8 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 9 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 10 ret <2 x i32> %vcvt.i 11} 12 13; CHECK-LABEL: test2 14; CHECK-NOT: fmul.4s 15; CHECK: fcvtzs.4s v0, v0, #3 16; CHECK: ret 17define <4 x i32> @test2(<4 x float> %f) { 18 %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> 19 %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32> 20 ret <4 x i32> %vcvt.i 21} 22 23; CHECK-LABEL: test3 24; CHECK-NOT: fmul.2d 25; CHECK: fcvtzs.2d v0, v0, #5 26; CHECK: ret 27define <2 x i64> @test3(<2 x double> %d) { 28 %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00> 29 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64> 30 ret <2 x i64> %vcvt.i 31} 32 33; Truncate double to i32 34; CHECK-LABEL: test4 35; CHECK-NOT: fmul.2d v0, v0, #4 36; CHECK: fcvtzs.2d v0, v0 37; CHECK: xtn.2s 38; CHECK: ret 39define <2 x i32> @test4(<2 x double> %d) { 40 %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00> 41 %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32> 42 ret <2 x i32> %vcvt.i 43} 44 45; Truncate float to i16 46; CHECK-LABEL: test5 47; CHECK-NOT: fmul.2s 48; CHECK: fcvtzs.2s v0, v0, #4 49; CHECK: ret 50define <2 x i16> @test5(<2 x float> %f) { 51 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 52 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16> 53 ret <2 x i16> %vcvt.i 54} 55 56; Don't convert float to i64 57; CHECK-LABEL: test6 58; CHECK: fmov.2s v1, #16.00000000 59; CHECK: fmul.2s v0, v0, v1 60; CHECK: fcvtl v0.2d, v0.2s 61; CHECK: fcvtzs.2d v0, v0 62; CHECK: ret 63define <2 x i64> @test6(<2 x float> %f) { 64 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 65 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64> 66 ret <2 x i64> %vcvt.i 67} 68 69; Check unsigned conversion. 70; CHECK-LABEL: test7 71; CHECK-NOT: fmul.2s 72; CHECK: fcvtzu.2s v0, v0, #4 73; CHECK: ret 74define <2 x i32> @test7(<2 x float> %f) { 75 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00> 76 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 77 ret <2 x i32> %vcvt.i 78} 79 80; Test which should not fold due to non-power of 2. 81; CHECK-LABEL: test8 82; CHECK: fmov.2s v1, #17.00000000 83; CHECK: fmul.2s v0, v0, v1 84; CHECK: fcvtzu.2s v0, v0 85; CHECK: ret 86define <2 x i32> @test8(<2 x float> %f) { 87 %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00> 88 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 89 ret <2 x i32> %vcvt.i 90} 91 92; Test which should not fold due to non-matching power of 2. 93; CHECK-LABEL: test9 94; CHECK: fmul.2s v0, v0, v1 95; CHECK: fcvtzu.2s v0, v0 96; CHECK: ret 97define <2 x i32> @test9(<2 x float> %f) { 98 %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00> 99 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 100 ret <2 x i32> %vcvt.i 101} 102 103; Combine all undefs. 104; CHECK-LABEL: test10 105; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} 106; CHECK: ret 107define <2 x i32> @test10(<2 x float> %f) { 108 %mul.i = fmul <2 x float> %f, <float undef, float undef> 109 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 110 ret <2 x i32> %vcvt.i 111} 112 113; Combine if mix of undef and pow2. 114; CHECK-LABEL: test11 115; CHECK: fcvtzu.2s v0, v0, #3 116; CHECK: ret 117define <2 x i32> @test11(<2 x float> %f) { 118 %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00> 119 %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> 120 ret <2 x i32> %vcvt.i 121} 122 123; Don't combine when multiplied by 0.0. 124; CHECK-LABEL: test12 125; CHECK: fmul.2s v0, v0, v1 126; CHECK: fcvtzs.2s v0, v0 127; CHECK: ret 128define <2 x i32> @test12(<2 x float> %f) { 129 %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00> 130 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 131 ret <2 x i32> %vcvt.i 132} 133 134; Test which should not fold due to power of 2 out of range (i.e., 2^33). 135; CHECK-LABEL: test13 136; CHECK: fmul.2s v0, v0, v1 137; CHECK: fcvtzs.2s v0, v0 138; CHECK: ret 139define <2 x i32> @test13(<2 x float> %f) { 140 %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000> 141 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 142 ret <2 x i32> %vcvt.i 143} 144 145; Test case where const is max power of 2 (i.e., 2^32). 146; CHECK-LABEL: test14 147; CHECK: fcvtzs.2s v0, v0, #32 148; CHECK: ret 149define <2 x i32> @test14(<2 x float> %f) { 150 %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000> 151 %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> 152 ret <2 x i32> %vcvt.i 153} 154 155; CHECK-LABEL: test_illegal_fp_to_int: 156; CHECK: fcvtzs.4s v0, v0, #2 157define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) { 158 %scale = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0> 159 %val = fptosi <3 x float> %scale to <3 x i32> 160 ret <3 x i32> %val 161} 162