1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "common/math/mat.h"
18
19 #include <float.h>
20
21 #include "chre/util/nanoapp/assert.h"
22
23 #ifdef _OS_BUILD_
24 #include <nanohub_math.h>
25 #include <seos.h>
26 #else
27 #include <math.h>
28 #ifndef UNROLLED
29 #define UNROLLED
30 #endif
31 #endif // _OS_BUILD_
32
33 #include <stddef.h>
34 #include <string.h>
35
36 #define EPSILON 1E-5f
37 #define CHOLESKY_TOLERANCE 1E-6f
38
39 // Forward declarations.
40 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
41 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
42 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
43 uint32_t l, uint32_t i, uint32_t j);
44
45 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
46
initZeroMatrix(struct Mat33 * A)47 void initZeroMatrix(struct Mat33 *A) {
48 CHRE_ASSERT_NOT_NULL(A);
49 memset(A->elem, 0.0f, sizeof(A->elem));
50 }
51
52 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)53 void initDiagonalMatrix(struct Mat33 *A, float x) {
54 CHRE_ASSERT_NOT_NULL(A);
55 initZeroMatrix(A);
56
57 uint32_t i;
58 for (i = 0; i < 3; ++i) {
59 A->elem[i][i] = x;
60 }
61 }
62
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)63 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
64 const struct Vec3 *v2, const struct Vec3 *v3) {
65 CHRE_ASSERT_NOT_NULL(A);
66 CHRE_ASSERT_NOT_NULL(v1);
67 CHRE_ASSERT_NOT_NULL(v2);
68 CHRE_ASSERT_NOT_NULL(v3);
69 A->elem[0][0] = v1->x;
70 A->elem[0][1] = v2->x;
71 A->elem[0][2] = v3->x;
72
73 A->elem[1][0] = v1->y;
74 A->elem[1][1] = v2->y;
75 A->elem[1][2] = v3->y;
76
77 A->elem[2][0] = v1->z;
78 A->elem[2][1] = v2->z;
79 A->elem[2][2] = v3->z;
80 }
81
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)82 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
83 CHRE_ASSERT_NOT_NULL(out);
84 CHRE_ASSERT_NOT_NULL(A);
85 CHRE_ASSERT_NOT_NULL(v);
86 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
87 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
88 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
89 }
90
91 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)92 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
93 const struct Mat33 *B) {
94 CHRE_ASSERT_NOT_NULL(out);
95 CHRE_ASSERT_NOT_NULL(A);
96 CHRE_ASSERT_NOT_NULL(B);
97 CHRE_ASSERT(out != A);
98 CHRE_ASSERT(out != B);
99
100 uint32_t i;
101 for (i = 0; i < 3; ++i) {
102 uint32_t j;
103 for (j = 0; j < 3; ++j) {
104 uint32_t k;
105 float sum = 0.0f;
106 for (k = 0; k < 3; ++k) {
107 sum += A->elem[i][k] * B->elem[k][j];
108 }
109
110 out->elem[i][j] = sum;
111 }
112 }
113 }
114
115 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)116 void mat33ScalarMul(struct Mat33 *A, float c) {
117 CHRE_ASSERT_NOT_NULL(A);
118 uint32_t i;
119 for (i = 0; i < 3; ++i) {
120 uint32_t j;
121 for (j = 0; j < 3; ++j) {
122 A->elem[i][j] *= c;
123 }
124 }
125 }
126
127 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)128 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
129 CHRE_ASSERT_NOT_NULL(out);
130 CHRE_ASSERT_NOT_NULL(A);
131 uint32_t i;
132 for (i = 0; i < 3; ++i) {
133 uint32_t j;
134 for (j = 0; j < 3; ++j) {
135 out->elem[i][j] += A->elem[i][j];
136 }
137 }
138 }
139
140 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)141 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
142 CHRE_ASSERT_NOT_NULL(out);
143 CHRE_ASSERT_NOT_NULL(A);
144 uint32_t i;
145 for (i = 0; i < 3; ++i) {
146 uint32_t j;
147 for (j = 0; j < 3; ++j) {
148 out->elem[i][j] -= A->elem[i][j];
149 }
150 }
151 }
152
153 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)154 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
155 CHRE_ASSERT_NOT_NULL(A);
156 uint32_t i;
157 for (i = 0; i < 3; ++i) {
158 if (A->elem[i][i] < 0.0f) {
159 return 0;
160 }
161 }
162
163 for (i = 0; i < 3; ++i) {
164 uint32_t j;
165 for (j = i + 1; j < 3; ++j) {
166 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
167 return 0;
168 }
169 }
170 }
171
172 return 1;
173 }
174
175 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)176 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
177 const struct Mat33 *B) {
178 CHRE_ASSERT(out != A);
179 CHRE_ASSERT(out != B);
180 CHRE_ASSERT_NOT_NULL(out);
181 CHRE_ASSERT_NOT_NULL(A);
182 CHRE_ASSERT_NOT_NULL(B);
183 uint32_t i;
184 for (i = 0; i < 3; ++i) {
185 uint32_t j;
186 for (j = 0; j < 3; ++j) {
187 uint32_t k;
188 float sum = 0.0f;
189 for (k = 0; k < 3; ++k) {
190 sum += A->elem[k][i] * B->elem[k][j];
191 }
192
193 out->elem[i][j] = sum;
194 }
195 }
196 }
197
198 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)199 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
200 const struct Mat33 *B) {
201 CHRE_ASSERT(out != A);
202 CHRE_ASSERT(out != B);
203 CHRE_ASSERT_NOT_NULL(out);
204 CHRE_ASSERT_NOT_NULL(A);
205 CHRE_ASSERT_NOT_NULL(B);
206 uint32_t i;
207 for (i = 0; i < 3; ++i) {
208 uint32_t j;
209 for (j = 0; j < 3; ++j) {
210 uint32_t k;
211 float sum = 0.0f;
212 for (k = 0; k < 3; ++k) {
213 sum += A->elem[i][k] * B->elem[j][k];
214 }
215
216 out->elem[i][j] = sum;
217 }
218 }
219 }
220
221 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)222 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
223 CHRE_ASSERT_NOT_NULL(out);
224 CHRE_ASSERT_NOT_NULL(A);
225 float t;
226 initDiagonalMatrix(out, 1.0f);
227
228 struct Mat33 tmp = *A;
229
230 uint32_t i, k;
231 for (i = 0; i < 3; ++i) {
232 uint32_t swap = i;
233 uint32_t j;
234 for (j = i + 1; j < 3; ++j) {
235 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
236 swap = j;
237 }
238 }
239
240 if (swap != i) {
241 for (k = 0; k < 3; ++k) {
242 t = tmp.elem[i][k];
243 tmp.elem[i][k] = tmp.elem[swap][k];
244 tmp.elem[swap][k] = t;
245
246 t = out->elem[i][k];
247 out->elem[i][k] = out->elem[swap][k];
248 out->elem[swap][k] = t;
249 }
250 }
251 // divide by zero guard.
252 CHRE_ASSERT(fabsf(tmp.elem[i][i]) > 0);
253 if(!(fabsf(tmp.elem[i][i]) > 0)) {
254 return;
255 }
256 t = 1.0f / tmp.elem[i][i];
257 for (k = 0; k < 3; ++k) {
258 tmp.elem[i][k] *= t;
259 out->elem[i][k] *= t;
260 }
261
262 for (j = 0; j < 3; ++j) {
263 if (j != i) {
264 t = tmp.elem[j][i];
265 for (k = 0; k < 3; ++k) {
266 tmp.elem[j][k] -= tmp.elem[i][k] * t;
267 out->elem[j][k] -= out->elem[i][k] * t;
268 }
269 }
270 }
271 }
272 }
273
274 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)275 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
276 CHRE_ASSERT_NOT_NULL(out);
277 CHRE_ASSERT_NOT_NULL(A);
278 uint32_t i;
279 for (i = 0; i < 3; ++i) {
280 uint32_t j;
281 for (j = 0; j < 3; ++j) {
282 out->elem[i][j] = A->elem[j][i];
283 }
284 }
285 }
286
287 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)288 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
289 CHRE_ASSERT_NOT_NULL(A);
290 const uint32_t N = 3;
291 uint32_t k;
292
293 if (i == j) {
294 return;
295 }
296
297 for (k = 0; k < N; ++k) {
298 float tmp = A->elem[i][k];
299 A->elem[i][k] = A->elem[j][k];
300 A->elem[j][k] = tmp;
301 }
302 }
303
304 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)305 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
306 struct Mat33 *eigenvecs) {
307 CHRE_ASSERT_NOT_NULL(S);
308 CHRE_ASSERT_NOT_NULL(eigenvals);
309 CHRE_ASSERT_NOT_NULL(eigenvecs);
310 const uint32_t N = 3;
311 uint32_t i, j, k, l, m;
312
313 float _eigenvals[N];
314
315 uint32_t ind[N];
316 for (k = 0; k < N; ++k) {
317 ind[k] = mat33Maxind(S, k);
318 _eigenvals[k] = S->elem[k][k];
319 }
320
321 initDiagonalMatrix(eigenvecs, 1.0f);
322
323 for (;;) {
324 m = 0;
325 for (k = 1; k + 1 < N; ++k) {
326 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
327 m = k;
328 }
329 }
330
331 k = m;
332 l = ind[m];
333 float p = S->elem[k][l];
334
335 if (fabsf(p) < EPSILON) {
336 break;
337 }
338
339 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
340
341 float t = fabsf(y) + sqrtf(p * p + y * y);
342 float s = sqrtf(p * p + t * t);
343 float c = t / s;
344 s = p / s;
345 t = p * p / t;
346
347 if (y < 0.0f) {
348 s = -s;
349 t = -t;
350 }
351
352 S->elem[k][l] = 0.0f;
353
354 _eigenvals[k] -= t;
355 _eigenvals[l] += t;
356
357 for (i = 0; i < k; ++i) {
358 mat33Rotate(S, c, s, i, k, i, l);
359 }
360
361 for (i = k + 1; i < l; ++i) {
362 mat33Rotate(S, c, s, k, i, i, l);
363 }
364
365 for (i = l + 1; i < N; ++i) {
366 mat33Rotate(S, c, s, k, i, l, i);
367 }
368
369 for (i = 0; i < N; ++i) {
370 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
371 eigenvecs->elem[l][i] =
372 s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
373 eigenvecs->elem[k][i] = tmp;
374 }
375
376 ind[k] = mat33Maxind(S, k);
377 ind[l] = mat33Maxind(S, l);
378
379 float sum = 0.0f;
380 for (i = 0; i < N; ++i) {
381 for (j = i + 1; j < N; ++j) {
382 sum += fabsf(S->elem[i][j]);
383 }
384 }
385
386 if (sum < EPSILON) {
387 break;
388 }
389 }
390
391 for (k = 0; k < N; ++k) {
392 m = k;
393 for (l = k + 1; l < N; ++l) {
394 if (_eigenvals[l] > _eigenvals[m]) {
395 m = l;
396 }
397 }
398
399 if (k != m) {
400 float tmp = _eigenvals[k];
401 _eigenvals[k] = _eigenvals[m];
402 _eigenvals[m] = tmp;
403
404 mat33SwapRows(eigenvecs, k, m);
405 }
406 }
407
408 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
409 }
410
mat33Determinant(const struct Mat33 * A)411 float mat33Determinant(const struct Mat33 *A) {
412 CHRE_ASSERT_NOT_NULL(A);
413 return A->elem[0][0] *
414 (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
415 - A->elem[0][1] *
416 (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
417 + A->elem[0][2] *
418 (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
419 }
420
421 // index of largest off-diagonal element in row k
422 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)423 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
424 CHRE_ASSERT_NOT_NULL(A);
425 const uint32_t N = 3;
426
427 uint32_t m = k + 1;
428 uint32_t i;
429
430 for (i = k + 2; i < N; ++i) {
431 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
432 m = i;
433 }
434 }
435
436 return m;
437 }
438
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)439 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
440 uint32_t i, uint32_t j) {
441 CHRE_ASSERT_NOT_NULL(A);
442 float tmp = c * A->elem[k][l] - s * A->elem[i][j];
443 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
444 A->elem[k][l] = tmp;
445 }
446
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)447 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
448 CHRE_ASSERT_NOT_NULL(out);
449 CHRE_ASSERT_NOT_NULL(A);
450 CHRE_ASSERT_NOT_NULL(v);
451
452 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
453 A->elem[0][3] * v->w;
454
455 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
456 A->elem[1][3] * v->w;
457
458 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
459 A->elem[2][3] * v->w;
460
461 out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
462 A->elem[3][3] * v->w;
463 }
464
465 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)466 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
467 CHRE_ASSERT_NOT_NULL(LU);
468 CHRE_ASSERT_NOT_NULL(pivot);
469 const uint32_t N = 4;
470 uint32_t i, j, k;
471
472 for (k = 0; k < N; ++k) {
473 pivot->elem[k] = k;
474 float max = fabsf(LU->elem[k][k]);
475 for (j = k + 1; j < N; ++j) {
476 if (max < fabsf(LU->elem[j][k])) {
477 max = fabsf(LU->elem[j][k]);
478 pivot->elem[k] = j;
479 }
480 }
481
482 if (pivot->elem[k] != k) {
483 mat44SwapRows(LU, k, pivot->elem[k]);
484 }
485
486 if (fabsf(LU->elem[k][k]) < EPSILON) {
487 continue;
488 }
489
490 for (j = k + 1; j < N; ++j) {
491 LU->elem[k][j] /= LU->elem[k][k];
492 }
493
494 for (i = k + 1; i < N; ++i) {
495 for (j = k + 1; j < N; ++j) {
496 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
497 }
498 }
499 }
500 }
501
502 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)503 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
504 CHRE_ASSERT_NOT_NULL(A);
505 const uint32_t N = 4;
506 uint32_t k;
507
508 if (i == j) {
509 return;
510 }
511
512 for (k = 0; k < N; ++k) {
513 float tmp = A->elem[i][k];
514 A->elem[i][k] = A->elem[j][k];
515 A->elem[j][k] = tmp;
516 }
517 }
518
519 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)520 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
521 const struct Size4 *pivot) {
522 CHRE_ASSERT_NOT_NULL(A);
523 CHRE_ASSERT_NOT_NULL(x);
524 CHRE_ASSERT_NOT_NULL(b);
525 CHRE_ASSERT_NOT_NULL(pivot);
526 const uint32_t N = 4;
527 uint32_t i, k;
528
529 float bCopy[N];
530 bCopy[0] = b->x;
531 bCopy[1] = b->y;
532 bCopy[2] = b->z;
533 bCopy[3] = b->w;
534
535 float _x[N];
536 for (k = 0; k < N; ++k) {
537 if (pivot->elem[k] != k) {
538 float tmp = bCopy[k];
539 bCopy[k] = bCopy[pivot->elem[k]];
540 bCopy[pivot->elem[k]] = tmp;
541 }
542
543 _x[k] = bCopy[k];
544 for (i = 0; i < k; ++i) {
545 _x[k] -= _x[i] * A->elem[k][i];
546 }
547 _x[k] /= A->elem[k][k];
548 }
549
550 for (k = N; k-- > 0;) {
551 for (i = k + 1; i < N; ++i) {
552 _x[k] -= _x[i] * A->elem[k][i];
553 }
554 }
555
556 initVec4(x, _x[0], _x[1], _x[2], _x[3]);
557 }
558
matMaxDiagonalElement(const float * square_mat,size_t n)559 float matMaxDiagonalElement(const float *square_mat, size_t n) {
560 CHRE_ASSERT_NOT_NULL(square_mat);
561 CHRE_ASSERT(n > 0);
562 size_t i;
563 float max = square_mat[0];
564 const size_t n_square = n * n;
565 const size_t offset = n + 1;
566 for (i = offset; i < n_square; i += offset) {
567 if (square_mat[i] > max) {
568 max = square_mat[i];
569 }
570 }
571 return max;
572 }
573
matAddConstantDiagonal(float * square_mat,float u,size_t n)574 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
575 CHRE_ASSERT_NOT_NULL(square_mat);
576 size_t i;
577 const size_t n_square = n * n;
578 const size_t offset = n + 1;
579 for (i = 0; i < n_square; i += offset) {
580 square_mat[i] += u;
581 }
582 }
583
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)584 void matTransposeMultiplyMat(float *out, const float *A,
585 size_t nrows, size_t ncols) {
586 CHRE_ASSERT_NOT_NULL(out);
587 CHRE_ASSERT_NOT_NULL(A);
588 size_t i;
589 size_t j;
590 size_t k;
591 memset(out, 0, sizeof(float) * ncols * ncols);
592 for (i = 0; i < ncols; ++i) {
593 for (j = 0; j < ncols; ++j) {
594 // Since A' * A is symmetric, can use upper diagonal elements
595 // to fill in the lower diagonal without recomputing.
596 if (j < i) {
597 out[i * ncols + j] = out[j * ncols + i];
598 } else {
599 // mat_out[i, j] = ai ' * aj
600 out[i * ncols + j] = 0;
601 for (k = 0; k < nrows; ++k) {
602 out[i * ncols + j] += A[k * ncols + i] *
603 A[k * ncols + j];
604 }
605 }
606 }
607 }
608 }
609
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)610 void matMultiplyVec(float *out, const float *A, const float *v,
611 size_t nrows, size_t ncols) {
612 CHRE_ASSERT_NOT_NULL(out);
613 CHRE_ASSERT_NOT_NULL(A);
614 CHRE_ASSERT_NOT_NULL(v);
615 size_t i;
616 for (i = 0; i < nrows; ++i) {
617 const float *row = &A[i * ncols];
618 out[i] = vecDot(row, v, ncols);
619 }
620 }
621
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)622 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
623 size_t nrows, size_t ncols) {
624 CHRE_ASSERT_NOT_NULL(out);
625 CHRE_ASSERT_NOT_NULL(A);
626 CHRE_ASSERT_NOT_NULL(v);
627 size_t i, j;
628 for (i = 0; i < ncols; ++i) {
629 out[i] = 0;
630 for (j = 0; j < nrows; ++j) {
631 out[i] += A[j * ncols + i] * v[j];
632 }
633 }
634 }
635
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)636 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
637 CHRE_ASSERT_NOT_NULL(x);
638 CHRE_ASSERT_NOT_NULL(L);
639 CHRE_ASSERT_NOT_NULL(b);
640 CHRE_ASSERT(n <= INT32_MAX);
641 int32_t i, j; // Loops below require signed integers.
642 int32_t s_n = (int32_t)n; // Signed n.
643 float sum = 0.0f;
644 // 1. Solve Ly = b through forward substitution. Use x[] to store y.
645 for (i = 0; i < s_n; ++i) {
646 sum = 0.0f;
647 for (j = 0; j < i; ++j) {
648 sum += L[i * s_n + j] * x[j];
649 }
650 // Check for non-zero diagonals (don't divide by zero).
651 if (L[i * s_n + i] < EPSILON) {
652 return false;
653 }
654 x[i] = (b[i] - sum) / L[i * s_n + i];
655 }
656
657 // 2. Solve L'x = y through backwards substitution. Use x[] to store both
658 // y and x.
659 for (i = s_n - 1; i >= 0; --i) {
660 sum = 0.0f;
661 for (j = i + 1; j < s_n; ++j) {
662 sum += L[j * s_n + i] * x[j];
663 }
664 x[i] = (x[i] - sum) / L[i * s_n + i];
665 }
666
667 return true;
668 }
669
matCholeskyDecomposition(float * L,const float * A,size_t n)670 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
671 CHRE_ASSERT_NOT_NULL(L);
672 CHRE_ASSERT_NOT_NULL(A);
673 size_t i, j, k;
674 float sum = 0.0f;
675 // initialize L to zero.
676 memset(L, 0, sizeof(float) * n * n);
677
678 for (i = 0; i < n; ++i) {
679 // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
680 sum = 0.0f;
681 for (k = 0; k < i; ++k) {
682 sum += L[i * n + k] * L[i * n + k];
683 }
684 sum = A[i * n + i] - sum;
685 // If diagonal element of L is too small, cholesky fails.
686 if (sum < CHOLESKY_TOLERANCE) {
687 return false;
688 }
689 L[i * n + i] = sqrtf(sum);
690
691 // for j = i+1:N, compute L[j][i] =
692 // (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
693 for (j = i + 1; j < n; ++j) {
694 sum = 0.0f;
695 for (k = 0; k < i; ++k) {
696 sum += L[i * n + k] * L[j * n + k];
697 }
698 // division okay because magnitude of L[i][i] already checked above.
699 L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
700 }
701 }
702
703 return true;
704 }
705