1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algos/mat.h>
18 
19 #include <assert.h>
20 #include <nanohub_math.h>
21 #include <sys/types.h>
22 #include <float.h>
23 #include <string.h>
24 #include <seos.h>
25 
26 
27 #define kEps 1E-5
28 
initZeroMatrix(struct Mat33 * A)29 void initZeroMatrix(struct Mat33 *A) {
30     memset(A->elem, 0.0f, sizeof(A->elem));
31 }
32 
33 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)34 void initDiagonalMatrix(struct Mat33 *A, float x)
35 {
36     initZeroMatrix(A);
37 
38     uint32_t i;
39     for (i = 0; i < 3; ++i) {
40         A->elem[i][i] = x;
41     }
42 }
43 
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)44 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, const struct Vec3 *v2, const struct Vec3 *v3)
45 {
46     A->elem[0][0] = v1->x;
47     A->elem[0][1] = v2->x;
48     A->elem[0][2] = v3->x;
49 
50     A->elem[1][0] = v1->y;
51     A->elem[1][1] = v2->y;
52     A->elem[1][2] = v3->y;
53 
54     A->elem[2][0] = v1->z;
55     A->elem[2][1] = v2->z;
56     A->elem[2][2] = v3->z;
57 }
58 
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)59 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v)
60 {
61     // assert(out != v);
62     out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
63     out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
64     out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
65 }
66 
67 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)68 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
69 {
70     // assert(out != A);
71     // assert(out != B);
72 
73     uint32_t i;
74     for (i = 0; i < 3; ++i) {
75         uint32_t j;
76         for (j = 0; j < 3; ++j) {
77             uint32_t k;
78             float sum = 0.0f;
79             for (k = 0; k < 3; ++k) {
80                 sum += A->elem[i][k] * B->elem[k][j];
81             }
82 
83             out->elem[i][j] = sum;
84         }
85     }
86 }
87 
88 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)89 void mat33ScalarMul(struct Mat33 *A, float c)
90 {
91     uint32_t i;
92     for (i = 0; i < 3; ++i) {
93         uint32_t j;
94         for (j = 0; j < 3; ++j) {
95             A->elem[i][j] *= c;
96         }
97     }
98 }
99 
100 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)101 void mat33Add(struct Mat33 *out, const struct Mat33 *A)
102 {
103     uint32_t i;
104     for (i = 0; i < 3; ++i) {
105         uint32_t j;
106         for (j = 0; j < 3; ++j) {
107             out->elem[i][j] += A->elem[i][j];
108         }
109     }
110 }
111 
112 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)113 void mat33Sub(struct Mat33 *out, const struct Mat33 *A)
114 {
115     uint32_t i;
116     for (i = 0; i < 3; ++i) {
117         uint32_t j;
118         for (j = 0; j < 3; ++j) {
119             out->elem[i][j] -= A->elem[i][j];
120         }
121     }
122 }
123 
124 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)125 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance)
126 {
127     uint32_t i;
128     for (i = 0; i < 3; ++i) {
129         if (A->elem[i][i] < 0.0f) {
130             return 0;
131         }
132     }
133 
134     for (i = 0; i < 3; ++i) {
135         uint32_t j;
136         for (j = i + 1; j < 3; ++j) {
137             if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
138                 return 0;
139             }
140         }
141     }
142 
143     return 1;
144 }
145 
146 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)147 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
148 {
149     // assert(out != A);
150     // assert(out != B);
151 
152     uint32_t i;
153     for (i = 0; i < 3; ++i) {
154         uint32_t j;
155         for (j = 0; j < 3; ++j) {
156             uint32_t k;
157             float sum = 0.0f;
158             for (k = 0; k < 3; ++k) {
159                 sum += A->elem[k][i] * B->elem[k][j];
160             }
161 
162             out->elem[i][j] = sum;
163         }
164     }
165 }
166 
167 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)168 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
169 {
170     // assert(out != A);
171     // assert(out != B);
172 
173     uint32_t i;
174     for (i = 0; i < 3; ++i) {
175         uint32_t j;
176         for (j = 0; j < 3; ++j) {
177             uint32_t k;
178             float sum = 0.0f;
179             for (k = 0; k < 3; ++k) {
180                 sum += A->elem[i][k] * B->elem[j][k];
181             }
182 
183             out->elem[i][j] = sum;
184         }
185     }
186 }
187 
188 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)189 void mat33Invert(struct Mat33 *out, const struct Mat33 *A)
190 {
191     float t;
192     initDiagonalMatrix(out, 1.0f);
193 
194     struct Mat33 tmp = *A;
195 
196     uint32_t i, k;
197     for (i = 0; i < 3; ++i) {
198         uint32_t swap = i;
199         uint32_t j;
200         for (j = i + 1; j < 3; ++j) {
201             if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
202                 swap = j;
203             }
204         }
205 
206         if (swap != i) {
207             for (k = 0; k < 3; ++k) {
208                 t = tmp.elem[i][k];
209                 tmp.elem[i][k] = tmp.elem[swap][k];
210                 tmp.elem[swap][k] = t;
211 
212                 t = out->elem[i][k];
213                 out->elem[i][k] = out->elem[swap][k];
214                 out->elem[swap][k] = t;
215             }
216         }
217 
218         t = 1.0f / tmp.elem[i][i];
219         for (k = 0; k < 3; ++k) {
220             tmp.elem[i][k] *= t;
221             out->elem[i][k] *= t;
222         }
223 
224         for (j = 0; j < 3; ++j) {
225             if (j != i) {
226                 t = tmp.elem[j][i];
227                 for (k = 0; k < 3; ++k) {
228                     tmp.elem[j][k] -= tmp.elem[i][k] * t;
229                     out->elem[j][k] -= out->elem[i][k] * t;
230                 }
231             }
232         }
233     }
234 }
235 
236 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)237 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A)
238 {
239     uint32_t i;
240     for (i = 0; i < 3; ++i) {
241         uint32_t j;
242         for (j = 0; j < 3; ++j) {
243             out->elem[i][j] = A->elem[j][i];
244         }
245     }
246 }
247 
248 UNROLLED
mat33DecomposeLup(struct Mat33 * LU,struct Size3 * pivot)249 void mat33DecomposeLup(struct Mat33 *LU, struct Size3 *pivot)
250 {
251 
252     const uint32_t N = 3;
253     uint32_t i, j, k;
254 
255     for (k = 0; k < N; ++k) {
256         pivot->elem[k] = k;
257         float max = fabsf(LU->elem[k][k]);
258         for (j = k + 1; j < N; ++j) {
259             if (max < fabsf(LU->elem[j][k])) {
260                 max = fabsf(LU->elem[j][k]);
261                 pivot->elem[k] = j;
262             }
263         }
264 
265         if (pivot->elem[k] != k) {
266             mat33SwapRows(LU, k, pivot->elem[k]);
267         }
268 
269         if (fabsf(LU->elem[k][k]) < kEps) {
270             continue;
271         }
272 
273         for (j = k + 1; j < N; ++j) {
274             LU->elem[k][j] /= LU->elem[k][k];
275         }
276 
277         for (i = k + 1; i < N; ++i) {
278             for (j = k + 1; j < 3; ++j) {
279                 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
280             }
281         }
282     }
283 }
284 
285 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)286 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j)
287 {
288 
289     const uint32_t N = 3;
290     uint32_t k;
291 
292     if (i == j) {
293         return;
294     }
295 
296     for (k = 0; k < N; ++k) {
297         float tmp = A->elem[i][k];
298         A->elem[i][k] = A->elem[j][k];
299         A->elem[j][k] = tmp;
300     }
301 }
302 
303 UNROLLED
mat33Solve(const struct Mat33 * A,struct Vec3 * x,const struct Vec3 * b,const struct Size3 * pivot)304 void mat33Solve(const struct Mat33 *A, struct Vec3 *x, const struct Vec3 *b, const struct Size3 *pivot)
305 {
306 
307     const uint32_t N = 3;
308     uint32_t i, k;
309 
310     float bCopy[N];
311     bCopy[0] = b->x;
312     bCopy[1] = b->y;
313     bCopy[2] = b->z;
314 
315     float _x[N];
316     for (k = 0; k < N; ++k) {
317         if (pivot->elem[k] != k) {
318             float tmp = bCopy[k];
319             bCopy[k] = bCopy[pivot->elem[k]];
320             bCopy[pivot->elem[k]] = tmp;
321         }
322 
323         _x[k] = bCopy[k];
324         for (i = 0; i < k; ++i) {
325             _x[k] -= _x[i] * A->elem[k][i];
326         }
327         _x[k] /= A->elem[k][k];
328     }
329 
330     for (k = N; k-- > 0;) {
331         for (i = k + 1; i < N; ++i) {
332             _x[k] -= _x[i] * A->elem[k][i];
333         }
334     }
335 
336     initVec3(x, _x[0], _x[1], _x[2]);
337 }
338 
339 /* Returns the eigenvalues and corresponding eigenvectors of the _symmetric_ matrix.
340  The i-th eigenvalue corresponds to the eigenvector in the i-th _row_ of "eigenvecs".
341  */
342 
343 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)344 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, struct Mat33 *eigenvecs)
345 {
346 
347     const uint32_t N = 3;
348     uint32_t i, j, k, l, m;
349 
350     float _eigenvals[N];
351 
352     uint32_t ind[N];
353     for (k = 0; k < N; ++k) {
354         ind[k] = mat33Maxind(S, k);
355         _eigenvals[k] = S->elem[k][k];
356     }
357 
358     initDiagonalMatrix(eigenvecs, 1.0f);
359 
360     for (;;) {
361         m = 0;
362         for (k = 1; k + 1 < N; ++k) {
363             if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
364                 m = k;
365             }
366         }
367 
368         k = m;
369         l = ind[m];
370         float p = S->elem[k][l];
371 
372         if (fabsf(p) < kEps) {
373             break;
374         }
375 
376         float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
377 
378         float t = fabsf(y) + sqrtf(p * p + y * y);
379         float s = sqrtf(p * p + t * t);
380         float c = t / s;
381         s = p / s;
382         t = p * p / t;
383 
384         if (y < 0.0f) {
385             s = -s;
386             t = -t;
387         }
388 
389         S->elem[k][l] = 0.0f;
390 
391         _eigenvals[k] -= t;
392         _eigenvals[l] += t;
393 
394         for (i = 0; i < k; ++i) {
395             mat33Rotate(S, c, s, i, k, i, l);
396         }
397 
398         for (i = k + 1; i < l; ++i) {
399             mat33Rotate(S, c, s, k, i, i, l);
400         }
401 
402         for (i = l + 1; i < N; ++i) {
403             mat33Rotate(S, c, s, k, i, l, i);
404         }
405 
406         for (i = 0; i < N; ++i) {
407             float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
408             eigenvecs->elem[l][i] = s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
409             eigenvecs->elem[k][i] = tmp;
410         }
411 
412         ind[k] = mat33Maxind(S, k);
413         ind[l] = mat33Maxind(S, l);
414 
415         float sum = 0.0f;
416         for (i = 0; i < N; ++i) {
417             for (j = i + 1; j < N; ++j) {
418                 sum += fabsf(S->elem[i][j]);
419             }
420         }
421 
422         if (sum < kEps) {
423             break;
424         }
425     }
426 
427     for (k = 0; k < N; ++k) {
428         m = k;
429         for (l = k + 1; l < N; ++l) {
430             if (_eigenvals[l] > _eigenvals[m]) {
431                 m = l;
432             }
433         }
434 
435         if (k != m) {
436             float tmp = _eigenvals[k];
437             _eigenvals[k] = _eigenvals[m];
438             _eigenvals[m] = tmp;
439 
440             mat33SwapRows(eigenvecs, k, m);
441         }
442     }
443 
444     initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
445 }
446 
447 // index of largest off-diagonal element in row k
448 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)449 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k)
450 {
451 
452     const uint32_t N = 3;
453 
454     uint32_t m = k + 1;
455     uint32_t i;
456 
457     for (i = k + 2; i < N; ++i) {
458         if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
459             m = i;
460         }
461     }
462 
463     return m;
464 }
465 
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)466 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, uint32_t i, uint32_t j)
467 {
468     float tmp = c * A->elem[k][l] - s * A->elem[i][j];
469     A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
470     A->elem[k][l] = tmp;
471 }
472 
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)473 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v)
474 {
475     // assert(out != v);
476 
477     out->x =
478         A->elem[0][0] * v->x
479             + A->elem[0][1] * v->y
480             + A->elem[0][2] * v->z
481             + A->elem[0][3] * v->w;
482 
483     out->y =
484         A->elem[1][0] * v->x
485             + A->elem[1][1] * v->y
486             + A->elem[1][2] * v->z
487             + A->elem[1][3] * v->w;
488 
489     out->z =
490         A->elem[2][0] * v->x
491             + A->elem[2][1] * v->y
492             + A->elem[2][2] * v->z
493             + A->elem[2][3] * v->w;
494 
495     out->w =
496         A->elem[3][0] * v->x
497             + A->elem[3][1] * v->y
498             + A->elem[3][2] * v->z
499             + A->elem[3][3] * v->w;
500 }
501 
502 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)503 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot)
504 {
505     const uint32_t N = 4;
506     uint32_t i, j, k;
507 
508     for (k = 0; k < N; ++k) {
509         pivot->elem[k] = k;
510         float max = fabsf(LU->elem[k][k]);
511         for (j = k + 1; j < N; ++j) {
512             if (max < fabsf(LU->elem[j][k])) {
513                 max = fabsf(LU->elem[j][k]);
514                 pivot->elem[k] = j;
515             }
516         }
517 
518         if (pivot->elem[k] != k) {
519             mat44SwapRows(LU, k, pivot->elem[k]);
520         }
521 
522         if (fabsf(LU->elem[k][k]) < kEps) {
523             continue;
524         }
525 
526         for (j = k + 1; j < N; ++j) {
527             LU->elem[k][j] /= LU->elem[k][k];
528         }
529 
530         for (i = k + 1; i < N; ++i) {
531             for (j = k + 1; j < N; ++j) {
532                 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
533             }
534         }
535     }
536 }
537 
538 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)539 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j)
540 {
541 
542     const uint32_t N = 4;
543     uint32_t k;
544 
545     if (i == j) {
546         return;
547     }
548 
549     for (k = 0; k < N; ++k) {
550         float tmp = A->elem[i][k];
551         A->elem[i][k] = A->elem[j][k];
552         A->elem[j][k] = tmp;
553     }
554 }
555 
556 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)557 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, const struct Size4 *pivot)
558 {
559 
560     const uint32_t N = 4;
561     uint32_t i, k;
562 
563     float bCopy[N];
564     bCopy[0] = b->x;
565     bCopy[1] = b->y;
566     bCopy[2] = b->z;
567     bCopy[3] = b->w;
568 
569     float _x[N];
570     for (k = 0; k < N; ++k) {
571         if (pivot->elem[k] != k) {
572             float tmp = bCopy[k];
573             bCopy[k] = bCopy[pivot->elem[k]];
574             bCopy[pivot->elem[k]] = tmp;
575         }
576 
577         _x[k] = bCopy[k];
578         for (i = 0; i < k; ++i) {
579             _x[k] -= _x[i] * A->elem[k][i];
580         }
581         _x[k] /= A->elem[k][k];
582     }
583 
584     for (k = N; k-- > 0;) {
585         for (i = k + 1; i < N; ++i) {
586             _x[k] -= _x[i] * A->elem[k][i];
587         }
588     }
589 
590     initVec4(x, _x[0], _x[1], _x[2], _x[3]);
591 }
592 
593 
594