1 #include <arm_neon.h>
2
3 struct Matrix43 {
4 float32x4_t row0;
5 float32x4_t row1;
6 float32x4_t row2;
7 float32x4_t row3;
8 };
9
operator *(const Matrix43 & m1,const Matrix43 & m2)10 __attribute__((always_inline)) inline Matrix43 operator*(const Matrix43& m1, const Matrix43& m2) {
11 Matrix43 rr;
12 rr.row0 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row0, 0));
13 rr.row0 = vmlaq_n_f32(rr.row0, m2.row1, vgetq_lane_f32(m1.row0, 1));
14 rr.row0 = vmlaq_n_f32(rr.row0, m2.row2, vgetq_lane_f32(m1.row0, 2));
15
16 rr.row1 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row1, 0));
17 rr.row1 = vmlaq_n_f32(rr.row1, m2.row1, vgetq_lane_f32(m1.row1, 1));
18 rr.row1 = vmlaq_n_f32(rr.row1, m2.row2, vgetq_lane_f32(m1.row1, 2));
19
20 rr.row2 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row2, 0));
21 rr.row2 = vmlaq_n_f32(rr.row2, m2.row1, vgetq_lane_f32(m1.row2, 1));
22 rr.row2 = vmlaq_n_f32(rr.row2, m2.row2, vgetq_lane_f32(m1.row2, 2));
23
24 rr.row3 = vmlaq_n_f32(m2.row3, m2.row0, vgetq_lane_f32(m1.row3, 0));
25 rr.row3 = vmlaq_n_f32(rr.row3, m2.row1, vgetq_lane_f32(m1.row3, 1));
26 rr.row3 = vmlaq_n_f32(rr.row3, m2.row2, vgetq_lane_f32(m1.row3, 2));
27 return rr;
28 }
29
_f_with_internal_compiler_error(const Matrix43 & m,const void * a1,const void * a2)30 void _f_with_internal_compiler_error(const Matrix43& m, const void* a1, const void* a2) {
31 m * m * m;
32 }
33