1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algos/mat.h>
18
19 #include <assert.h>
20 #include <nanohub_math.h>
21 #include <sys/types.h>
22 #include <float.h>
23 #include <string.h>
24 #include <seos.h>
25
26
27 #define kEps 1E-5
28
initZeroMatrix(struct Mat33 * A)29 void initZeroMatrix(struct Mat33 *A) {
30 memset(A->elem, 0.0f, sizeof(A->elem));
31 }
32
33 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)34 void initDiagonalMatrix(struct Mat33 *A, float x)
35 {
36 initZeroMatrix(A);
37
38 uint32_t i;
39 for (i = 0; i < 3; ++i) {
40 A->elem[i][i] = x;
41 }
42 }
43
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)44 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1, const struct Vec3 *v2, const struct Vec3 *v3)
45 {
46 A->elem[0][0] = v1->x;
47 A->elem[0][1] = v2->x;
48 A->elem[0][2] = v3->x;
49
50 A->elem[1][0] = v1->y;
51 A->elem[1][1] = v2->y;
52 A->elem[1][2] = v3->y;
53
54 A->elem[2][0] = v1->z;
55 A->elem[2][1] = v2->z;
56 A->elem[2][2] = v3->z;
57 }
58
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)59 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v)
60 {
61 // assert(out != v);
62 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
63 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
64 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
65 }
66
67 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)68 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
69 {
70 // assert(out != A);
71 // assert(out != B);
72
73 uint32_t i;
74 for (i = 0; i < 3; ++i) {
75 uint32_t j;
76 for (j = 0; j < 3; ++j) {
77 uint32_t k;
78 float sum = 0.0f;
79 for (k = 0; k < 3; ++k) {
80 sum += A->elem[i][k] * B->elem[k][j];
81 }
82
83 out->elem[i][j] = sum;
84 }
85 }
86 }
87
88 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)89 void mat33ScalarMul(struct Mat33 *A, float c)
90 {
91 uint32_t i;
92 for (i = 0; i < 3; ++i) {
93 uint32_t j;
94 for (j = 0; j < 3; ++j) {
95 A->elem[i][j] *= c;
96 }
97 }
98 }
99
100 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)101 void mat33Add(struct Mat33 *out, const struct Mat33 *A)
102 {
103 uint32_t i;
104 for (i = 0; i < 3; ++i) {
105 uint32_t j;
106 for (j = 0; j < 3; ++j) {
107 out->elem[i][j] += A->elem[i][j];
108 }
109 }
110 }
111
112 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)113 void mat33Sub(struct Mat33 *out, const struct Mat33 *A)
114 {
115 uint32_t i;
116 for (i = 0; i < 3; ++i) {
117 uint32_t j;
118 for (j = 0; j < 3; ++j) {
119 out->elem[i][j] -= A->elem[i][j];
120 }
121 }
122 }
123
124 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)125 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance)
126 {
127 uint32_t i;
128 for (i = 0; i < 3; ++i) {
129 if (A->elem[i][i] < 0.0f) {
130 return 0;
131 }
132 }
133
134 for (i = 0; i < 3; ++i) {
135 uint32_t j;
136 for (j = i + 1; j < 3; ++j) {
137 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
138 return 0;
139 }
140 }
141 }
142
143 return 1;
144 }
145
146 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)147 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
148 {
149 // assert(out != A);
150 // assert(out != B);
151
152 uint32_t i;
153 for (i = 0; i < 3; ++i) {
154 uint32_t j;
155 for (j = 0; j < 3; ++j) {
156 uint32_t k;
157 float sum = 0.0f;
158 for (k = 0; k < 3; ++k) {
159 sum += A->elem[k][i] * B->elem[k][j];
160 }
161
162 out->elem[i][j] = sum;
163 }
164 }
165 }
166
167 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)168 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A, const struct Mat33 *B)
169 {
170 // assert(out != A);
171 // assert(out != B);
172
173 uint32_t i;
174 for (i = 0; i < 3; ++i) {
175 uint32_t j;
176 for (j = 0; j < 3; ++j) {
177 uint32_t k;
178 float sum = 0.0f;
179 for (k = 0; k < 3; ++k) {
180 sum += A->elem[i][k] * B->elem[j][k];
181 }
182
183 out->elem[i][j] = sum;
184 }
185 }
186 }
187
188 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)189 void mat33Invert(struct Mat33 *out, const struct Mat33 *A)
190 {
191 float t;
192 initDiagonalMatrix(out, 1.0f);
193
194 struct Mat33 tmp = *A;
195
196 uint32_t i, k;
197 for (i = 0; i < 3; ++i) {
198 uint32_t swap = i;
199 uint32_t j;
200 for (j = i + 1; j < 3; ++j) {
201 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
202 swap = j;
203 }
204 }
205
206 if (swap != i) {
207 for (k = 0; k < 3; ++k) {
208 t = tmp.elem[i][k];
209 tmp.elem[i][k] = tmp.elem[swap][k];
210 tmp.elem[swap][k] = t;
211
212 t = out->elem[i][k];
213 out->elem[i][k] = out->elem[swap][k];
214 out->elem[swap][k] = t;
215 }
216 }
217
218 t = 1.0f / tmp.elem[i][i];
219 for (k = 0; k < 3; ++k) {
220 tmp.elem[i][k] *= t;
221 out->elem[i][k] *= t;
222 }
223
224 for (j = 0; j < 3; ++j) {
225 if (j != i) {
226 t = tmp.elem[j][i];
227 for (k = 0; k < 3; ++k) {
228 tmp.elem[j][k] -= tmp.elem[i][k] * t;
229 out->elem[j][k] -= out->elem[i][k] * t;
230 }
231 }
232 }
233 }
234 }
235
236 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)237 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A)
238 {
239 uint32_t i;
240 for (i = 0; i < 3; ++i) {
241 uint32_t j;
242 for (j = 0; j < 3; ++j) {
243 out->elem[i][j] = A->elem[j][i];
244 }
245 }
246 }
247
248 UNROLLED
mat33DecomposeLup(struct Mat33 * LU,struct Size3 * pivot)249 void mat33DecomposeLup(struct Mat33 *LU, struct Size3 *pivot)
250 {
251
252 const uint32_t N = 3;
253 uint32_t i, j, k;
254
255 for (k = 0; k < N; ++k) {
256 pivot->elem[k] = k;
257 float max = fabsf(LU->elem[k][k]);
258 for (j = k + 1; j < N; ++j) {
259 if (max < fabsf(LU->elem[j][k])) {
260 max = fabsf(LU->elem[j][k]);
261 pivot->elem[k] = j;
262 }
263 }
264
265 if (pivot->elem[k] != k) {
266 mat33SwapRows(LU, k, pivot->elem[k]);
267 }
268
269 if (fabsf(LU->elem[k][k]) < kEps) {
270 continue;
271 }
272
273 for (j = k + 1; j < N; ++j) {
274 LU->elem[k][j] /= LU->elem[k][k];
275 }
276
277 for (i = k + 1; i < N; ++i) {
278 for (j = k + 1; j < 3; ++j) {
279 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
280 }
281 }
282 }
283 }
284
285 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)286 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j)
287 {
288
289 const uint32_t N = 3;
290 uint32_t k;
291
292 if (i == j) {
293 return;
294 }
295
296 for (k = 0; k < N; ++k) {
297 float tmp = A->elem[i][k];
298 A->elem[i][k] = A->elem[j][k];
299 A->elem[j][k] = tmp;
300 }
301 }
302
303 UNROLLED
mat33Solve(const struct Mat33 * A,struct Vec3 * x,const struct Vec3 * b,const struct Size3 * pivot)304 void mat33Solve(const struct Mat33 *A, struct Vec3 *x, const struct Vec3 *b, const struct Size3 *pivot)
305 {
306
307 const uint32_t N = 3;
308 uint32_t i, k;
309
310 float bCopy[N];
311 bCopy[0] = b->x;
312 bCopy[1] = b->y;
313 bCopy[2] = b->z;
314
315 float _x[N];
316 for (k = 0; k < N; ++k) {
317 if (pivot->elem[k] != k) {
318 float tmp = bCopy[k];
319 bCopy[k] = bCopy[pivot->elem[k]];
320 bCopy[pivot->elem[k]] = tmp;
321 }
322
323 _x[k] = bCopy[k];
324 for (i = 0; i < k; ++i) {
325 _x[k] -= _x[i] * A->elem[k][i];
326 }
327 _x[k] /= A->elem[k][k];
328 }
329
330 for (k = N; k-- > 0;) {
331 for (i = k + 1; i < N; ++i) {
332 _x[k] -= _x[i] * A->elem[k][i];
333 }
334 }
335
336 initVec3(x, _x[0], _x[1], _x[2]);
337 }
338
339 /* Returns the eigenvalues and corresponding eigenvectors of the _symmetric_ matrix.
340 The i-th eigenvalue corresponds to the eigenvector in the i-th _row_ of "eigenvecs".
341 */
342
343 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)344 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals, struct Mat33 *eigenvecs)
345 {
346
347 const uint32_t N = 3;
348 uint32_t i, j, k, l, m;
349
350 float _eigenvals[N];
351
352 uint32_t ind[N];
353 for (k = 0; k < N; ++k) {
354 ind[k] = mat33Maxind(S, k);
355 _eigenvals[k] = S->elem[k][k];
356 }
357
358 initDiagonalMatrix(eigenvecs, 1.0f);
359
360 for (;;) {
361 m = 0;
362 for (k = 1; k + 1 < N; ++k) {
363 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
364 m = k;
365 }
366 }
367
368 k = m;
369 l = ind[m];
370 float p = S->elem[k][l];
371
372 if (fabsf(p) < kEps) {
373 break;
374 }
375
376 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
377
378 float t = fabsf(y) + sqrtf(p * p + y * y);
379 float s = sqrtf(p * p + t * t);
380 float c = t / s;
381 s = p / s;
382 t = p * p / t;
383
384 if (y < 0.0f) {
385 s = -s;
386 t = -t;
387 }
388
389 S->elem[k][l] = 0.0f;
390
391 _eigenvals[k] -= t;
392 _eigenvals[l] += t;
393
394 for (i = 0; i < k; ++i) {
395 mat33Rotate(S, c, s, i, k, i, l);
396 }
397
398 for (i = k + 1; i < l; ++i) {
399 mat33Rotate(S, c, s, k, i, i, l);
400 }
401
402 for (i = l + 1; i < N; ++i) {
403 mat33Rotate(S, c, s, k, i, l, i);
404 }
405
406 for (i = 0; i < N; ++i) {
407 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
408 eigenvecs->elem[l][i] = s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
409 eigenvecs->elem[k][i] = tmp;
410 }
411
412 ind[k] = mat33Maxind(S, k);
413 ind[l] = mat33Maxind(S, l);
414
415 float sum = 0.0f;
416 for (i = 0; i < N; ++i) {
417 for (j = i + 1; j < N; ++j) {
418 sum += fabsf(S->elem[i][j]);
419 }
420 }
421
422 if (sum < kEps) {
423 break;
424 }
425 }
426
427 for (k = 0; k < N; ++k) {
428 m = k;
429 for (l = k + 1; l < N; ++l) {
430 if (_eigenvals[l] > _eigenvals[m]) {
431 m = l;
432 }
433 }
434
435 if (k != m) {
436 float tmp = _eigenvals[k];
437 _eigenvals[k] = _eigenvals[m];
438 _eigenvals[m] = tmp;
439
440 mat33SwapRows(eigenvecs, k, m);
441 }
442 }
443
444 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
445 }
446
447 // index of largest off-diagonal element in row k
448 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)449 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k)
450 {
451
452 const uint32_t N = 3;
453
454 uint32_t m = k + 1;
455 uint32_t i;
456
457 for (i = k + 2; i < N; ++i) {
458 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
459 m = i;
460 }
461 }
462
463 return m;
464 }
465
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)466 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l, uint32_t i, uint32_t j)
467 {
468 float tmp = c * A->elem[k][l] - s * A->elem[i][j];
469 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
470 A->elem[k][l] = tmp;
471 }
472
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)473 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v)
474 {
475 // assert(out != v);
476
477 out->x =
478 A->elem[0][0] * v->x
479 + A->elem[0][1] * v->y
480 + A->elem[0][2] * v->z
481 + A->elem[0][3] * v->w;
482
483 out->y =
484 A->elem[1][0] * v->x
485 + A->elem[1][1] * v->y
486 + A->elem[1][2] * v->z
487 + A->elem[1][3] * v->w;
488
489 out->z =
490 A->elem[2][0] * v->x
491 + A->elem[2][1] * v->y
492 + A->elem[2][2] * v->z
493 + A->elem[2][3] * v->w;
494
495 out->w =
496 A->elem[3][0] * v->x
497 + A->elem[3][1] * v->y
498 + A->elem[3][2] * v->z
499 + A->elem[3][3] * v->w;
500 }
501
502 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)503 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot)
504 {
505 const uint32_t N = 4;
506 uint32_t i, j, k;
507
508 for (k = 0; k < N; ++k) {
509 pivot->elem[k] = k;
510 float max = fabsf(LU->elem[k][k]);
511 for (j = k + 1; j < N; ++j) {
512 if (max < fabsf(LU->elem[j][k])) {
513 max = fabsf(LU->elem[j][k]);
514 pivot->elem[k] = j;
515 }
516 }
517
518 if (pivot->elem[k] != k) {
519 mat44SwapRows(LU, k, pivot->elem[k]);
520 }
521
522 if (fabsf(LU->elem[k][k]) < kEps) {
523 continue;
524 }
525
526 for (j = k + 1; j < N; ++j) {
527 LU->elem[k][j] /= LU->elem[k][k];
528 }
529
530 for (i = k + 1; i < N; ++i) {
531 for (j = k + 1; j < N; ++j) {
532 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
533 }
534 }
535 }
536 }
537
538 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)539 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j)
540 {
541
542 const uint32_t N = 4;
543 uint32_t k;
544
545 if (i == j) {
546 return;
547 }
548
549 for (k = 0; k < N; ++k) {
550 float tmp = A->elem[i][k];
551 A->elem[i][k] = A->elem[j][k];
552 A->elem[j][k] = tmp;
553 }
554 }
555
556 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)557 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b, const struct Size4 *pivot)
558 {
559
560 const uint32_t N = 4;
561 uint32_t i, k;
562
563 float bCopy[N];
564 bCopy[0] = b->x;
565 bCopy[1] = b->y;
566 bCopy[2] = b->z;
567 bCopy[3] = b->w;
568
569 float _x[N];
570 for (k = 0; k < N; ++k) {
571 if (pivot->elem[k] != k) {
572 float tmp = bCopy[k];
573 bCopy[k] = bCopy[pivot->elem[k]];
574 bCopy[pivot->elem[k]] = tmp;
575 }
576
577 _x[k] = bCopy[k];
578 for (i = 0; i < k; ++i) {
579 _x[k] -= _x[i] * A->elem[k][i];
580 }
581 _x[k] /= A->elem[k][k];
582 }
583
584 for (k = N; k-- > 0;) {
585 for (i = k + 1; i < N; ++i) {
586 _x[k] -= _x[i] * A->elem[k][i];
587 }
588 }
589
590 initVec4(x, _x[0], _x[1], _x[2], _x[3]);
591 }
592
593
594