1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/math/mat.h"
18 
19 #include <float.h>
20 
21 #include "chre/util/nanoapp/assert.h"
22 
23 #ifdef _OS_BUILD_
24 #include <nanohub_math.h>
25 #include <seos.h>
26 #else
27 #include <math.h>
28 #ifndef UNROLLED
29 #define UNROLLED
30 #endif
31 #endif  // _OS_BUILD_
32 
33 #include <stddef.h>
34 #include <string.h>
35 
36 #define EPSILON 1E-5f
37 #define CHOLESKY_TOLERANCE 1E-6f
38 
39 // Forward declarations.
40 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
41 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
42 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
43                         uint32_t l, uint32_t i, uint32_t j);
44 
45 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
46 
initZeroMatrix(struct Mat33 * A)47 void initZeroMatrix(struct Mat33 *A) {
48   CHRE_ASSERT_NOT_NULL(A);
49   memset(A->elem, 0.0f, sizeof(A->elem));
50 }
51 
52 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)53 void initDiagonalMatrix(struct Mat33 *A, float x) {
54   CHRE_ASSERT_NOT_NULL(A);
55   initZeroMatrix(A);
56 
57   uint32_t i;
58   for (i = 0; i < 3; ++i) {
59     A->elem[i][i] = x;
60   }
61 }
62 
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)63 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
64                        const struct Vec3 *v2, const struct Vec3 *v3) {
65   CHRE_ASSERT_NOT_NULL(A);
66   CHRE_ASSERT_NOT_NULL(v1);
67   CHRE_ASSERT_NOT_NULL(v2);
68   CHRE_ASSERT_NOT_NULL(v3);
69   A->elem[0][0] = v1->x;
70   A->elem[0][1] = v2->x;
71   A->elem[0][2] = v3->x;
72 
73   A->elem[1][0] = v1->y;
74   A->elem[1][1] = v2->y;
75   A->elem[1][2] = v3->y;
76 
77   A->elem[2][0] = v1->z;
78   A->elem[2][1] = v2->z;
79   A->elem[2][2] = v3->z;
80 }
81 
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)82 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
83   CHRE_ASSERT_NOT_NULL(out);
84   CHRE_ASSERT_NOT_NULL(A);
85   CHRE_ASSERT_NOT_NULL(v);
86   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
87   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
88   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
89 }
90 
91 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)92 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
93                    const struct Mat33 *B) {
94   CHRE_ASSERT_NOT_NULL(out);
95   CHRE_ASSERT_NOT_NULL(A);
96   CHRE_ASSERT_NOT_NULL(B);
97   CHRE_ASSERT(out != A);
98   CHRE_ASSERT(out != B);
99 
100   uint32_t i;
101   for (i = 0; i < 3; ++i) {
102     uint32_t j;
103     for (j = 0; j < 3; ++j) {
104       uint32_t k;
105       float sum = 0.0f;
106       for (k = 0; k < 3; ++k) {
107         sum += A->elem[i][k] * B->elem[k][j];
108       }
109 
110       out->elem[i][j] = sum;
111     }
112   }
113 }
114 
115 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)116 void mat33ScalarMul(struct Mat33 *A, float c) {
117   CHRE_ASSERT_NOT_NULL(A);
118   uint32_t i;
119   for (i = 0; i < 3; ++i) {
120     uint32_t j;
121     for (j = 0; j < 3; ++j) {
122       A->elem[i][j] *= c;
123     }
124   }
125 }
126 
127 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)128 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
129   CHRE_ASSERT_NOT_NULL(out);
130   CHRE_ASSERT_NOT_NULL(A);
131   uint32_t i;
132   for (i = 0; i < 3; ++i) {
133     uint32_t j;
134     for (j = 0; j < 3; ++j) {
135       out->elem[i][j] += A->elem[i][j];
136     }
137   }
138 }
139 
140 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)141 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
142   CHRE_ASSERT_NOT_NULL(out);
143   CHRE_ASSERT_NOT_NULL(A);
144   uint32_t i;
145   for (i = 0; i < 3; ++i) {
146     uint32_t j;
147     for (j = 0; j < 3; ++j) {
148       out->elem[i][j] -= A->elem[i][j];
149     }
150   }
151 }
152 
153 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)154 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
155   CHRE_ASSERT_NOT_NULL(A);
156   uint32_t i;
157   for (i = 0; i < 3; ++i) {
158     if (A->elem[i][i] < 0.0f) {
159       return 0;
160     }
161   }
162 
163   for (i = 0; i < 3; ++i) {
164     uint32_t j;
165     for (j = i + 1; j < 3; ++j) {
166       if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
167         return 0;
168       }
169     }
170   }
171 
172   return 1;
173 }
174 
175 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)176 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
177                              const struct Mat33 *B) {
178   CHRE_ASSERT(out != A);
179   CHRE_ASSERT(out != B);
180   CHRE_ASSERT_NOT_NULL(out);
181   CHRE_ASSERT_NOT_NULL(A);
182   CHRE_ASSERT_NOT_NULL(B);
183   uint32_t i;
184   for (i = 0; i < 3; ++i) {
185     uint32_t j;
186     for (j = 0; j < 3; ++j) {
187       uint32_t k;
188       float sum = 0.0f;
189       for (k = 0; k < 3; ++k) {
190         sum += A->elem[k][i] * B->elem[k][j];
191       }
192 
193       out->elem[i][j] = sum;
194     }
195   }
196 }
197 
198 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)199 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
200                               const struct Mat33 *B) {
201   CHRE_ASSERT(out != A);
202   CHRE_ASSERT(out != B);
203   CHRE_ASSERT_NOT_NULL(out);
204   CHRE_ASSERT_NOT_NULL(A);
205   CHRE_ASSERT_NOT_NULL(B);
206   uint32_t i;
207   for (i = 0; i < 3; ++i) {
208     uint32_t j;
209     for (j = 0; j < 3; ++j) {
210       uint32_t k;
211       float sum = 0.0f;
212       for (k = 0; k < 3; ++k) {
213         sum += A->elem[i][k] * B->elem[j][k];
214       }
215 
216       out->elem[i][j] = sum;
217     }
218   }
219 }
220 
221 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)222 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
223   CHRE_ASSERT_NOT_NULL(out);
224   CHRE_ASSERT_NOT_NULL(A);
225   float t;
226   initDiagonalMatrix(out, 1.0f);
227 
228   struct Mat33 tmp = *A;
229 
230   uint32_t i, k;
231   for (i = 0; i < 3; ++i) {
232     uint32_t swap = i;
233     uint32_t j;
234     for (j = i + 1; j < 3; ++j) {
235       if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
236         swap = j;
237       }
238     }
239 
240     if (swap != i) {
241       for (k = 0; k < 3; ++k) {
242         t = tmp.elem[i][k];
243         tmp.elem[i][k] = tmp.elem[swap][k];
244         tmp.elem[swap][k] = t;
245 
246         t = out->elem[i][k];
247         out->elem[i][k] = out->elem[swap][k];
248         out->elem[swap][k] = t;
249       }
250     }
251     // divide by zero guard.
252     CHRE_ASSERT(fabsf(tmp.elem[i][i]) > 0);
253     if(!(fabsf(tmp.elem[i][i]) > 0)) {
254       return;
255     }
256     t = 1.0f / tmp.elem[i][i];
257     for (k = 0; k < 3; ++k) {
258       tmp.elem[i][k] *= t;
259       out->elem[i][k] *= t;
260     }
261 
262     for (j = 0; j < 3; ++j) {
263       if (j != i) {
264         t = tmp.elem[j][i];
265         for (k = 0; k < 3; ++k) {
266           tmp.elem[j][k] -= tmp.elem[i][k] * t;
267           out->elem[j][k] -= out->elem[i][k] * t;
268         }
269       }
270     }
271   }
272 }
273 
274 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)275 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
276   CHRE_ASSERT_NOT_NULL(out);
277   CHRE_ASSERT_NOT_NULL(A);
278   uint32_t i;
279   for (i = 0; i < 3; ++i) {
280     uint32_t j;
281     for (j = 0; j < 3; ++j) {
282       out->elem[i][j] = A->elem[j][i];
283     }
284   }
285 }
286 
287 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)288 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
289   CHRE_ASSERT_NOT_NULL(A);
290   const uint32_t N = 3;
291   uint32_t k;
292 
293   if (i == j) {
294     return;
295   }
296 
297   for (k = 0; k < N; ++k) {
298     float tmp = A->elem[i][k];
299     A->elem[i][k] = A->elem[j][k];
300     A->elem[j][k] = tmp;
301   }
302 }
303 
304 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)305 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
306                         struct Mat33 *eigenvecs) {
307   CHRE_ASSERT_NOT_NULL(S);
308   CHRE_ASSERT_NOT_NULL(eigenvals);
309   CHRE_ASSERT_NOT_NULL(eigenvecs);
310   const uint32_t N = 3;
311   uint32_t i, j, k, l, m;
312 
313   float _eigenvals[N];
314 
315   uint32_t ind[N];
316   for (k = 0; k < N; ++k) {
317     ind[k] = mat33Maxind(S, k);
318     _eigenvals[k] = S->elem[k][k];
319   }
320 
321   initDiagonalMatrix(eigenvecs, 1.0f);
322 
323   for (;;) {
324     m = 0;
325     for (k = 1; k + 1 < N; ++k) {
326       if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
327         m = k;
328       }
329     }
330 
331     k = m;
332     l = ind[m];
333     float p = S->elem[k][l];
334 
335     if (fabsf(p) < EPSILON) {
336       break;
337     }
338 
339     float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
340 
341     float t = fabsf(y) + sqrtf(p * p + y * y);
342     float s = sqrtf(p * p + t * t);
343     float c = t / s;
344     s = p / s;
345     t = p * p / t;
346 
347     if (y < 0.0f) {
348       s = -s;
349       t = -t;
350     }
351 
352     S->elem[k][l] = 0.0f;
353 
354     _eigenvals[k] -= t;
355     _eigenvals[l] += t;
356 
357     for (i = 0; i < k; ++i) {
358       mat33Rotate(S, c, s, i, k, i, l);
359     }
360 
361     for (i = k + 1; i < l; ++i) {
362       mat33Rotate(S, c, s, k, i, i, l);
363     }
364 
365     for (i = l + 1; i < N; ++i) {
366       mat33Rotate(S, c, s, k, i, l, i);
367     }
368 
369     for (i = 0; i < N; ++i) {
370       float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
371       eigenvecs->elem[l][i] =
372           s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
373       eigenvecs->elem[k][i] = tmp;
374     }
375 
376     ind[k] = mat33Maxind(S, k);
377     ind[l] = mat33Maxind(S, l);
378 
379     float sum = 0.0f;
380     for (i = 0; i < N; ++i) {
381       for (j = i + 1; j < N; ++j) {
382         sum += fabsf(S->elem[i][j]);
383       }
384     }
385 
386     if (sum < EPSILON) {
387       break;
388     }
389   }
390 
391   for (k = 0; k < N; ++k) {
392     m = k;
393     for (l = k + 1; l < N; ++l) {
394       if (_eigenvals[l] > _eigenvals[m]) {
395         m = l;
396       }
397     }
398 
399     if (k != m) {
400       float tmp = _eigenvals[k];
401       _eigenvals[k] = _eigenvals[m];
402       _eigenvals[m] = tmp;
403 
404       mat33SwapRows(eigenvecs, k, m);
405     }
406   }
407 
408   initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
409 }
410 
mat33Determinant(const struct Mat33 * A)411 float mat33Determinant(const struct Mat33 *A) {
412   CHRE_ASSERT_NOT_NULL(A);
413   return A->elem[0][0] *
414       (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
415       - A->elem[0][1] *
416       (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
417       + A->elem[0][2] *
418       (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
419 }
420 
421 // index of largest off-diagonal element in row k
422 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)423 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
424   CHRE_ASSERT_NOT_NULL(A);
425   const uint32_t N = 3;
426 
427   uint32_t m = k + 1;
428   uint32_t i;
429 
430   for (i = k + 2; i < N; ++i) {
431     if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
432       m = i;
433     }
434   }
435 
436   return m;
437 }
438 
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)439 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
440                  uint32_t i, uint32_t j) {
441   CHRE_ASSERT_NOT_NULL(A);
442   float tmp = c * A->elem[k][l] - s * A->elem[i][j];
443   A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
444   A->elem[k][l] = tmp;
445 }
446 
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)447 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
448   CHRE_ASSERT_NOT_NULL(out);
449   CHRE_ASSERT_NOT_NULL(A);
450   CHRE_ASSERT_NOT_NULL(v);
451 
452   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
453            A->elem[0][3] * v->w;
454 
455   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
456            A->elem[1][3] * v->w;
457 
458   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
459            A->elem[2][3] * v->w;
460 
461   out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
462            A->elem[3][3] * v->w;
463 }
464 
465 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)466 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
467   CHRE_ASSERT_NOT_NULL(LU);
468   CHRE_ASSERT_NOT_NULL(pivot);
469   const uint32_t N = 4;
470   uint32_t i, j, k;
471 
472   for (k = 0; k < N; ++k) {
473     pivot->elem[k] = k;
474     float max = fabsf(LU->elem[k][k]);
475     for (j = k + 1; j < N; ++j) {
476       if (max < fabsf(LU->elem[j][k])) {
477         max = fabsf(LU->elem[j][k]);
478         pivot->elem[k] = j;
479       }
480     }
481 
482     if (pivot->elem[k] != k) {
483       mat44SwapRows(LU, k, pivot->elem[k]);
484     }
485 
486     if (fabsf(LU->elem[k][k]) < EPSILON) {
487       continue;
488     }
489 
490     for (j = k + 1; j < N; ++j) {
491       LU->elem[k][j] /= LU->elem[k][k];
492     }
493 
494     for (i = k + 1; i < N; ++i) {
495       for (j = k + 1; j < N; ++j) {
496         LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
497       }
498     }
499   }
500 }
501 
502 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)503 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
504   CHRE_ASSERT_NOT_NULL(A);
505   const uint32_t N = 4;
506   uint32_t k;
507 
508   if (i == j) {
509     return;
510   }
511 
512   for (k = 0; k < N; ++k) {
513     float tmp = A->elem[i][k];
514     A->elem[i][k] = A->elem[j][k];
515     A->elem[j][k] = tmp;
516   }
517 }
518 
519 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)520 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
521                 const struct Size4 *pivot) {
522   CHRE_ASSERT_NOT_NULL(A);
523   CHRE_ASSERT_NOT_NULL(x);
524   CHRE_ASSERT_NOT_NULL(b);
525   CHRE_ASSERT_NOT_NULL(pivot);
526   const uint32_t N = 4;
527   uint32_t i, k;
528 
529   float bCopy[N];
530   bCopy[0] = b->x;
531   bCopy[1] = b->y;
532   bCopy[2] = b->z;
533   bCopy[3] = b->w;
534 
535   float _x[N];
536   for (k = 0; k < N; ++k) {
537     if (pivot->elem[k] != k) {
538       float tmp = bCopy[k];
539       bCopy[k] = bCopy[pivot->elem[k]];
540       bCopy[pivot->elem[k]] = tmp;
541     }
542 
543     _x[k] = bCopy[k];
544     for (i = 0; i < k; ++i) {
545       _x[k] -= _x[i] * A->elem[k][i];
546     }
547     _x[k] /= A->elem[k][k];
548   }
549 
550   for (k = N; k-- > 0;) {
551     for (i = k + 1; i < N; ++i) {
552       _x[k] -= _x[i] * A->elem[k][i];
553     }
554   }
555 
556   initVec4(x, _x[0], _x[1], _x[2], _x[3]);
557 }
558 
matMaxDiagonalElement(const float * square_mat,size_t n)559 float matMaxDiagonalElement(const float *square_mat, size_t n) {
560   CHRE_ASSERT_NOT_NULL(square_mat);
561   CHRE_ASSERT(n > 0);
562   size_t i;
563   float max = square_mat[0];
564   const size_t n_square = n * n;
565   const size_t offset = n + 1;
566   for (i = offset; i < n_square; i += offset) {
567     if (square_mat[i] > max) {
568       max = square_mat[i];
569     }
570   }
571   return max;
572 }
573 
matAddConstantDiagonal(float * square_mat,float u,size_t n)574 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
575   CHRE_ASSERT_NOT_NULL(square_mat);
576   size_t i;
577   const size_t n_square = n * n;
578   const size_t offset = n + 1;
579   for (i = 0; i < n_square; i += offset) {
580     square_mat[i] += u;
581   }
582 }
583 
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)584 void matTransposeMultiplyMat(float *out, const float *A,
585                              size_t nrows, size_t ncols) {
586   CHRE_ASSERT_NOT_NULL(out);
587   CHRE_ASSERT_NOT_NULL(A);
588   size_t i;
589   size_t j;
590   size_t k;
591   memset(out, 0, sizeof(float) * ncols * ncols);
592   for (i = 0; i < ncols; ++i) {
593     for (j = 0; j < ncols; ++j) {
594       // Since A' * A is symmetric, can use upper diagonal elements
595       // to fill in the lower diagonal without recomputing.
596       if (j < i) {
597         out[i * ncols + j] = out[j * ncols + i];
598       } else {
599         // mat_out[i, j] = ai ' * aj
600         out[i * ncols + j] = 0;
601         for (k = 0; k < nrows; ++k) {
602           out[i * ncols + j] += A[k * ncols + i] *
603               A[k * ncols + j];
604         }
605       }
606     }
607   }
608 }
609 
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)610 void matMultiplyVec(float *out, const float *A, const float *v,
611                     size_t nrows, size_t ncols) {
612   CHRE_ASSERT_NOT_NULL(out);
613   CHRE_ASSERT_NOT_NULL(A);
614   CHRE_ASSERT_NOT_NULL(v);
615   size_t i;
616   for (i = 0; i < nrows; ++i) {
617     const float *row = &A[i * ncols];
618     out[i] = vecDot(row, v, ncols);
619   }
620 }
621 
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)622 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
623                              size_t nrows, size_t ncols) {
624   CHRE_ASSERT_NOT_NULL(out);
625   CHRE_ASSERT_NOT_NULL(A);
626   CHRE_ASSERT_NOT_NULL(v);
627   size_t i, j;
628   for (i = 0; i < ncols; ++i) {
629     out[i] = 0;
630     for (j = 0; j < nrows; ++j) {
631       out[i] += A[j * ncols + i] * v[j];
632     }
633   }
634 }
635 
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)636 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
637   CHRE_ASSERT_NOT_NULL(x);
638   CHRE_ASSERT_NOT_NULL(L);
639   CHRE_ASSERT_NOT_NULL(b);
640   CHRE_ASSERT(n <= INT32_MAX);
641   int32_t i, j;  // Loops below require signed integers.
642   int32_t s_n = (int32_t)n; // Signed n.
643   float sum = 0.0f;
644   // 1. Solve Ly = b through forward substitution. Use x[] to store y.
645   for (i = 0; i < s_n; ++i) {
646     sum = 0.0f;
647     for (j = 0; j < i; ++j) {
648       sum += L[i * s_n + j] * x[j];
649     }
650     // Check for non-zero diagonals (don't divide by zero).
651     if (L[i * s_n + i] < EPSILON) {
652       return false;
653     }
654     x[i] = (b[i] - sum) / L[i * s_n + i];
655   }
656 
657   // 2. Solve L'x = y through backwards substitution. Use x[] to store both
658   // y and x.
659   for (i = s_n - 1; i >= 0; --i) {
660     sum = 0.0f;
661     for (j = i + 1; j < s_n; ++j) {
662       sum += L[j * s_n + i] * x[j];
663     }
664     x[i] = (x[i] - sum) / L[i * s_n + i];
665   }
666 
667   return true;
668 }
669 
matCholeskyDecomposition(float * L,const float * A,size_t n)670 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
671   CHRE_ASSERT_NOT_NULL(L);
672   CHRE_ASSERT_NOT_NULL(A);
673   size_t i, j, k;
674   float sum = 0.0f;
675   // initialize L to zero.
676   memset(L, 0, sizeof(float) * n * n);
677 
678   for (i = 0; i < n; ++i) {
679     // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
680     sum = 0.0f;
681     for (k = 0; k < i; ++k) {
682       sum += L[i * n + k] * L[i * n + k];
683     }
684     sum = A[i * n + i] - sum;
685     // If diagonal element of L is too small, cholesky fails.
686     if (sum < CHOLESKY_TOLERANCE) {
687       return false;
688     }
689     L[i * n + i] = sqrtf(sum);
690 
691     // for j = i+1:N,  compute L[j][i] =
692     //      (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
693     for (j = i + 1; j < n; ++j) {
694       sum = 0.0f;
695       for (k = 0; k < i; ++k) {
696         sum += L[i * n + k] * L[j * n + k];
697       }
698       // division okay because magnitude of L[i][i] already checked above.
699       L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
700     }
701   }
702 
703   return true;
704 }
705