1 #include <arm_neon.h>
2
3 namespace math {
4 namespace internal {
5 #define _IOS_SHUFFLE_1032(vec) vrev64q_f32(vec)
6 #define _IOS_SHUFFLE_2301(vec) vcombine_f32(vget_high_f32(vec), vget_low_f32(vec))
dot4VecResult(const float32x4_t & vec1,const float32x4_t & vec2)7 inline float32x4_t dot4VecResult(const float32x4_t& vec1, const float32x4_t& vec2) {
8 float32x4_t result = vmulq_f32(vec1, vec2);
9 result = vaddq_f32(result, _IOS_SHUFFLE_1032(result));
10 result = vaddq_f32(result, _IOS_SHUFFLE_2301(result));
11 return result;
12 }
13
fastRSqrt(const float32x4_t & vec)14 inline float32x4_t fastRSqrt(const float32x4_t& vec) {
15 float32x4_t result;
16 result = vrsqrteq_f32(vec);
17 result = vmulq_f32(vrsqrtsq_f32(vmulq_f32(result, result), vec), result);
18 return result;
19 }
20
21 }
22 typedef float32x4_t Vector3;
23
normalize(const Vector3 & v1)24 inline Vector3 normalize(const Vector3& v1) {
25 float32x4_t dot;
26 dot = vsetq_lane_f32(0.0f, v1, 3);
27 dot = internal::dot4VecResult(dot, dot);
28
29 if (vgetq_lane_f32(dot, 0) == 0.0f) {
30 return v1;
31 } else {
32 Vector3 result;
33 result = vmulq_f32(v1, internal::fastRSqrt(dot));
34 return result;
35 }
36 }
37
cross(const Vector3 & v1,const Vector3 & v2)38 inline Vector3 cross(const Vector3& v1, const Vector3& v2) {
39 float32x4x2_t v_1203 = vzipq_f32(vcombine_f32(vrev64_f32(vget_low_f32(v1)), vrev64_f32(vget_low_f32(v2))), vcombine_f32(vget_high_f32(v1), vget_high_f32(v2)));
40 float32x4x2_t v_2013 = vzipq_f32(vcombine_f32(vrev64_f32(vget_low_f32(v_1203.val[0])), vrev64_f32(vget_low_f32(v_1203.val[1]))), vcombine_f32(vget_high_f32(v_1203.val[0]), vget_high_f32(v_1203.val[1])));
41
42 Vector3 result;
43 result = vmlsq_f32(vmulq_f32(v_1203.val[0], v_2013.val[1]), v_1203.val[1], v_2013.val[0]);
44 return result;
45 }
46 }
47
_f_with_internal_compiler_error_in_reload_cse_simplify_operands(const math::Vector3 & v1,const math::Vector3 & v2)48 void _f_with_internal_compiler_error_in_reload_cse_simplify_operands(const math::Vector3& v1, const math::Vector3& v2) {
49 math::normalize(math::cross(v1, v2));
50 }
51