1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/math/mat.h"
18 
19 #include <assert.h>
20 #include <float.h>
21 
22 #ifdef _OS_BUILD_
23 #include <nanohub_math.h>
24 #include <seos.h>
25 #else
26 #include <math.h>
27 #ifndef UNROLLED
28 #define UNROLLED
29 #endif
30 #endif  // _OS_BUILD_
31 
32 #include <stddef.h>
33 #include <string.h>
34 
35 #define EPSILON 1E-5
36 #define CHOLESKY_TOLERANCE 1E-6
37 
38 // Forward declarations.
39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
42                         uint32_t l, uint32_t i, uint32_t j);
43 
44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
45 
initZeroMatrix(struct Mat33 * A)46 void initZeroMatrix(struct Mat33 *A) {
47   ASSERT_NOT_NULL(A);
48   memset(A->elem, 0.0f, sizeof(A->elem));
49 }
50 
51 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)52 void initDiagonalMatrix(struct Mat33 *A, float x) {
53   ASSERT_NOT_NULL(A);
54   initZeroMatrix(A);
55 
56   uint32_t i;
57   for (i = 0; i < 3; ++i) {
58     A->elem[i][i] = x;
59   }
60 }
61 
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
63                        const struct Vec3 *v2, const struct Vec3 *v3) {
64   ASSERT_NOT_NULL(A);
65   ASSERT_NOT_NULL(v1);
66   ASSERT_NOT_NULL(v2);
67   ASSERT_NOT_NULL(v3);
68   A->elem[0][0] = v1->x;
69   A->elem[0][1] = v2->x;
70   A->elem[0][2] = v3->x;
71 
72   A->elem[1][0] = v1->y;
73   A->elem[1][1] = v2->y;
74   A->elem[1][2] = v3->y;
75 
76   A->elem[2][0] = v1->z;
77   A->elem[2][1] = v2->z;
78   A->elem[2][2] = v3->z;
79 }
80 
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
82   ASSERT_NOT_NULL(out);
83   ASSERT_NOT_NULL(A);
84   ASSERT_NOT_NULL(v);
85   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
86   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
87   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
88 }
89 
90 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
92                    const struct Mat33 *B) {
93   ASSERT_NOT_NULL(out);
94   ASSERT_NOT_NULL(A);
95   ASSERT_NOT_NULL(B);
96   ASSERT(out != A);
97   ASSERT(out != B);
98 
99   uint32_t i;
100   for (i = 0; i < 3; ++i) {
101     uint32_t j;
102     for (j = 0; j < 3; ++j) {
103       uint32_t k;
104       float sum = 0.0f;
105       for (k = 0; k < 3; ++k) {
106         sum += A->elem[i][k] * B->elem[k][j];
107       }
108 
109       out->elem[i][j] = sum;
110     }
111   }
112 }
113 
114 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)115 void mat33ScalarMul(struct Mat33 *A, float c) {
116   ASSERT_NOT_NULL(A);
117   uint32_t i;
118   for (i = 0; i < 3; ++i) {
119     uint32_t j;
120     for (j = 0; j < 3; ++j) {
121       A->elem[i][j] *= c;
122     }
123   }
124 }
125 
126 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
128   ASSERT_NOT_NULL(out);
129   ASSERT_NOT_NULL(A);
130   uint32_t i;
131   for (i = 0; i < 3; ++i) {
132     uint32_t j;
133     for (j = 0; j < 3; ++j) {
134       out->elem[i][j] += A->elem[i][j];
135     }
136   }
137 }
138 
139 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
141   ASSERT_NOT_NULL(out);
142   ASSERT_NOT_NULL(A);
143   uint32_t i;
144   for (i = 0; i < 3; ++i) {
145     uint32_t j;
146     for (j = 0; j < 3; ++j) {
147       out->elem[i][j] -= A->elem[i][j];
148     }
149   }
150 }
151 
152 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
154   ASSERT_NOT_NULL(A);
155   uint32_t i;
156   for (i = 0; i < 3; ++i) {
157     if (A->elem[i][i] < 0.0f) {
158       return 0;
159     }
160   }
161 
162   for (i = 0; i < 3; ++i) {
163     uint32_t j;
164     for (j = i + 1; j < 3; ++j) {
165       if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
166         return 0;
167       }
168     }
169   }
170 
171   return 1;
172 }
173 
174 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
176                              const struct Mat33 *B) {
177   ASSERT(out != A);
178   ASSERT(out != B);
179   ASSERT_NOT_NULL(out);
180   ASSERT_NOT_NULL(A);
181   ASSERT_NOT_NULL(B);
182   uint32_t i;
183   for (i = 0; i < 3; ++i) {
184     uint32_t j;
185     for (j = 0; j < 3; ++j) {
186       uint32_t k;
187       float sum = 0.0f;
188       for (k = 0; k < 3; ++k) {
189         sum += A->elem[k][i] * B->elem[k][j];
190       }
191 
192       out->elem[i][j] = sum;
193     }
194   }
195 }
196 
197 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
199                               const struct Mat33 *B) {
200   ASSERT(out != A);
201   ASSERT(out != B);
202   ASSERT_NOT_NULL(out);
203   ASSERT_NOT_NULL(A);
204   ASSERT_NOT_NULL(B);
205   uint32_t i;
206   for (i = 0; i < 3; ++i) {
207     uint32_t j;
208     for (j = 0; j < 3; ++j) {
209       uint32_t k;
210       float sum = 0.0f;
211       for (k = 0; k < 3; ++k) {
212         sum += A->elem[i][k] * B->elem[j][k];
213       }
214 
215       out->elem[i][j] = sum;
216     }
217   }
218 }
219 
220 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
222   ASSERT_NOT_NULL(out);
223   ASSERT_NOT_NULL(A);
224   float t;
225   initDiagonalMatrix(out, 1.0f);
226 
227   struct Mat33 tmp = *A;
228 
229   uint32_t i, k;
230   for (i = 0; i < 3; ++i) {
231     uint32_t swap = i;
232     uint32_t j;
233     for (j = i + 1; j < 3; ++j) {
234       if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
235         swap = j;
236       }
237     }
238 
239     if (swap != i) {
240       for (k = 0; k < 3; ++k) {
241         t = tmp.elem[i][k];
242         tmp.elem[i][k] = tmp.elem[swap][k];
243         tmp.elem[swap][k] = t;
244 
245         t = out->elem[i][k];
246         out->elem[i][k] = out->elem[swap][k];
247         out->elem[swap][k] = t;
248       }
249     }
250     // divide by zero guard.
251     ASSERT(fabs(tmp.elem[i][i]) > 0);
252     if(!(fabs(tmp.elem[i][i]) > 0)) {
253       return;
254     }
255     t = 1.0f / tmp.elem[i][i];
256     for (k = 0; k < 3; ++k) {
257       tmp.elem[i][k] *= t;
258       out->elem[i][k] *= t;
259     }
260 
261     for (j = 0; j < 3; ++j) {
262       if (j != i) {
263         t = tmp.elem[j][i];
264         for (k = 0; k < 3; ++k) {
265           tmp.elem[j][k] -= tmp.elem[i][k] * t;
266           out->elem[j][k] -= out->elem[i][k] * t;
267         }
268       }
269     }
270   }
271 }
272 
273 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
275   ASSERT_NOT_NULL(out);
276   ASSERT_NOT_NULL(A);
277   uint32_t i;
278   for (i = 0; i < 3; ++i) {
279     uint32_t j;
280     for (j = 0; j < 3; ++j) {
281       out->elem[i][j] = A->elem[j][i];
282     }
283   }
284 }
285 
286 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
288   ASSERT_NOT_NULL(A);
289   const uint32_t N = 3;
290   uint32_t k;
291 
292   if (i == j) {
293     return;
294   }
295 
296   for (k = 0; k < N; ++k) {
297     float tmp = A->elem[i][k];
298     A->elem[i][k] = A->elem[j][k];
299     A->elem[j][k] = tmp;
300   }
301 }
302 
303 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
305                         struct Mat33 *eigenvecs) {
306   ASSERT_NOT_NULL(S);
307   ASSERT_NOT_NULL(eigenvals);
308   ASSERT_NOT_NULL(eigenvecs);
309   const uint32_t N = 3;
310   uint32_t i, j, k, l, m;
311 
312   float _eigenvals[N];
313 
314   uint32_t ind[N];
315   for (k = 0; k < N; ++k) {
316     ind[k] = mat33Maxind(S, k);
317     _eigenvals[k] = S->elem[k][k];
318   }
319 
320   initDiagonalMatrix(eigenvecs, 1.0f);
321 
322   for (;;) {
323     m = 0;
324     for (k = 1; k + 1 < N; ++k) {
325       if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
326         m = k;
327       }
328     }
329 
330     k = m;
331     l = ind[m];
332     float p = S->elem[k][l];
333 
334     if (fabsf(p) < EPSILON) {
335       break;
336     }
337 
338     float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
339 
340     float t = fabsf(y) + sqrtf(p * p + y * y);
341     float s = sqrtf(p * p + t * t);
342     float c = t / s;
343     s = p / s;
344     t = p * p / t;
345 
346     if (y < 0.0f) {
347       s = -s;
348       t = -t;
349     }
350 
351     S->elem[k][l] = 0.0f;
352 
353     _eigenvals[k] -= t;
354     _eigenvals[l] += t;
355 
356     for (i = 0; i < k; ++i) {
357       mat33Rotate(S, c, s, i, k, i, l);
358     }
359 
360     for (i = k + 1; i < l; ++i) {
361       mat33Rotate(S, c, s, k, i, i, l);
362     }
363 
364     for (i = l + 1; i < N; ++i) {
365       mat33Rotate(S, c, s, k, i, l, i);
366     }
367 
368     for (i = 0; i < N; ++i) {
369       float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
370       eigenvecs->elem[l][i] =
371           s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
372       eigenvecs->elem[k][i] = tmp;
373     }
374 
375     ind[k] = mat33Maxind(S, k);
376     ind[l] = mat33Maxind(S, l);
377 
378     float sum = 0.0f;
379     for (i = 0; i < N; ++i) {
380       for (j = i + 1; j < N; ++j) {
381         sum += fabsf(S->elem[i][j]);
382       }
383     }
384 
385     if (sum < EPSILON) {
386       break;
387     }
388   }
389 
390   for (k = 0; k < N; ++k) {
391     m = k;
392     for (l = k + 1; l < N; ++l) {
393       if (_eigenvals[l] > _eigenvals[m]) {
394         m = l;
395       }
396     }
397 
398     if (k != m) {
399       float tmp = _eigenvals[k];
400       _eigenvals[k] = _eigenvals[m];
401       _eigenvals[m] = tmp;
402 
403       mat33SwapRows(eigenvecs, k, m);
404     }
405   }
406 
407   initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
408 }
409 
410 // index of largest off-diagonal element in row k
411 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)412 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
413   ASSERT_NOT_NULL(A);
414   const uint32_t N = 3;
415 
416   uint32_t m = k + 1;
417   uint32_t i;
418 
419   for (i = k + 2; i < N; ++i) {
420     if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
421       m = i;
422     }
423   }
424 
425   return m;
426 }
427 
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)428 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
429                  uint32_t i, uint32_t j) {
430   ASSERT_NOT_NULL(A);
431   float tmp = c * A->elem[k][l] - s * A->elem[i][j];
432   A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
433   A->elem[k][l] = tmp;
434 }
435 
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)436 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
437   ASSERT_NOT_NULL(out);
438   ASSERT_NOT_NULL(A);
439   ASSERT_NOT_NULL(v);
440 
441   out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
442            A->elem[0][3] * v->w;
443 
444   out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
445            A->elem[1][3] * v->w;
446 
447   out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
448            A->elem[2][3] * v->w;
449 
450   out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
451            A->elem[3][3] * v->w;
452 }
453 
454 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)455 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
456   ASSERT_NOT_NULL(LU);
457   ASSERT_NOT_NULL(pivot);
458   const uint32_t N = 4;
459   uint32_t i, j, k;
460 
461   for (k = 0; k < N; ++k) {
462     pivot->elem[k] = k;
463     float max = fabsf(LU->elem[k][k]);
464     for (j = k + 1; j < N; ++j) {
465       if (max < fabsf(LU->elem[j][k])) {
466         max = fabsf(LU->elem[j][k]);
467         pivot->elem[k] = j;
468       }
469     }
470 
471     if (pivot->elem[k] != k) {
472       mat44SwapRows(LU, k, pivot->elem[k]);
473     }
474 
475     if (fabsf(LU->elem[k][k]) < EPSILON) {
476       continue;
477     }
478 
479     for (j = k + 1; j < N; ++j) {
480       LU->elem[k][j] /= LU->elem[k][k];
481     }
482 
483     for (i = k + 1; i < N; ++i) {
484       for (j = k + 1; j < N; ++j) {
485         LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
486       }
487     }
488   }
489 }
490 
491 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)492 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
493   ASSERT_NOT_NULL(A);
494   const uint32_t N = 4;
495   uint32_t k;
496 
497   if (i == j) {
498     return;
499   }
500 
501   for (k = 0; k < N; ++k) {
502     float tmp = A->elem[i][k];
503     A->elem[i][k] = A->elem[j][k];
504     A->elem[j][k] = tmp;
505   }
506 }
507 
508 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)509 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
510                 const struct Size4 *pivot) {
511   ASSERT_NOT_NULL(A);
512   ASSERT_NOT_NULL(x);
513   ASSERT_NOT_NULL(b);
514   ASSERT_NOT_NULL(pivot);
515   const uint32_t N = 4;
516   uint32_t i, k;
517 
518   float bCopy[N];
519   bCopy[0] = b->x;
520   bCopy[1] = b->y;
521   bCopy[2] = b->z;
522   bCopy[3] = b->w;
523 
524   float _x[N];
525   for (k = 0; k < N; ++k) {
526     if (pivot->elem[k] != k) {
527       float tmp = bCopy[k];
528       bCopy[k] = bCopy[pivot->elem[k]];
529       bCopy[pivot->elem[k]] = tmp;
530     }
531 
532     _x[k] = bCopy[k];
533     for (i = 0; i < k; ++i) {
534       _x[k] -= _x[i] * A->elem[k][i];
535     }
536     _x[k] /= A->elem[k][k];
537   }
538 
539   for (k = N; k-- > 0;) {
540     for (i = k + 1; i < N; ++i) {
541       _x[k] -= _x[i] * A->elem[k][i];
542     }
543   }
544 
545   initVec4(x, _x[0], _x[1], _x[2], _x[3]);
546 }
547 
matMaxDiagonalElement(const float * square_mat,size_t n)548 float matMaxDiagonalElement(const float *square_mat, size_t n) {
549   ASSERT_NOT_NULL(square_mat);
550   ASSERT(n > 0);
551   size_t i;
552   float max = square_mat[0];
553   const size_t n_square = n * n;
554   const size_t offset = n + 1;
555   for (i = offset; i < n_square; i += offset) {
556     if (square_mat[i] > max) {
557       max = square_mat[i];
558     }
559   }
560   return max;
561 }
562 
matAddConstantDiagonal(float * square_mat,float u,size_t n)563 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
564   ASSERT_NOT_NULL(square_mat);
565   size_t i;
566   const size_t n_square = n * n;
567   const size_t offset = n + 1;
568   for (i = 0; i < n_square; i += offset) {
569     square_mat[i] += u;
570   }
571 }
572 
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)573 void matTransposeMultiplyMat(float *out, const float *A,
574                              size_t nrows, size_t ncols) {
575   ASSERT_NOT_NULL(out);
576   ASSERT_NOT_NULL(A);
577   size_t i;
578   size_t j;
579   size_t k;
580   memset(out, 0, sizeof(float) * ncols * ncols);
581   for (i = 0; i < ncols; ++i) {
582     for (j = 0; j < ncols; ++j) {
583       // Since A' * A is symmetric, can use upper diagonal elements
584       // to fill in the lower diagonal without recomputing.
585       if (j < i) {
586         out[i * ncols + j] = out[j * ncols + i];
587       } else {
588         // mat_out[i, j] = ai ' * aj
589         out[i * ncols + j] = 0;
590         for (k = 0; k < nrows; ++k) {
591           out[i * ncols + j] += A[k * ncols + i] *
592               A[k * ncols + j];
593         }
594       }
595     }
596   }
597 }
598 
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)599 void matMultiplyVec(float *out, const float *A, const float *v,
600                     size_t nrows, size_t ncols) {
601   ASSERT_NOT_NULL(out);
602   ASSERT_NOT_NULL(A);
603   ASSERT_NOT_NULL(v);
604   size_t i;
605   for (i = 0; i < nrows; ++i) {
606     const float *row = &A[i * ncols];
607     out[i] = vecDot(row, v, (int)ncols);
608   }
609 }
610 
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)611 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
612                              size_t nrows, size_t ncols) {
613   ASSERT_NOT_NULL(out);
614   ASSERT_NOT_NULL(A);
615   ASSERT_NOT_NULL(v);
616   size_t i, j;
617   for (i = 0; i < ncols; ++i) {
618     out[i] = 0;
619     for (j = 0; j < nrows; ++j) {
620       out[i] += A[j * ncols + i] * v[j];
621     }
622   }
623 }
624 
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)625 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
626   ASSERT_NOT_NULL(x);
627   ASSERT_NOT_NULL(L);
628   ASSERT_NOT_NULL(b);
629   ASSERT(n <= INT32_MAX);
630   int32_t i, j;  // Loops below require signed integers.
631   int32_t s_n = (int32_t)n; // Signed n.
632   float sum = 0.0f;
633   // 1. Solve Ly = b through forward substitution. Use x[] to store y.
634   for (i = 0; i < s_n; ++i) {
635     sum = 0.0f;
636     for (j = 0; j < i; ++j) {
637       sum += L[i * s_n + j] * x[j];
638     }
639     // Check for non-zero diagonals (don't divide by zero).
640     if (L[i * s_n + i] < EPSILON) {
641       return false;
642     }
643     x[i] = (b[i] - sum) / L[i * s_n + i];
644   }
645 
646   // 2. Solve L'x = y through backwards substitution. Use x[] to store both
647   // y and x.
648   for (i = s_n - 1; i >= 0; --i) {
649     sum = 0.0f;
650     for (j = i + 1; j < s_n; ++j) {
651       sum += L[j * s_n + i] * x[j];
652     }
653     x[i] = (x[i] - sum) / L[i * s_n + i];
654   }
655 
656   return true;
657 }
658 
matCholeskyDecomposition(float * L,const float * A,size_t n)659 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
660   ASSERT_NOT_NULL(L);
661   ASSERT_NOT_NULL(A);
662   size_t i, j, k;
663   float sum = 0.0f;
664   // initialize L to zero.
665   memset(L, 0, sizeof(float) * n * n);
666 
667   for (i = 0; i < n; ++i) {
668     // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
669     sum = 0.0f;
670     for (k = 0; k < i; ++k) {
671       sum += L[i * n + k] * L[i * n + k];
672     }
673     sum = A[i * n + i] - sum;
674     // If diagonal element of L is too small, cholesky fails.
675     if (sum < CHOLESKY_TOLERANCE) {
676       return false;
677     }
678     L[i * n + i] = sqrtf(sum);
679 
680     // for j = i+1:N,  compute L[j][i] =
681     //      (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
682     for (j = i + 1; j < n; ++j) {
683       sum = 0.0f;
684       for (k = 0; k < i; ++k) {
685         sum += L[i * n + k] * L[j * n + k];
686       }
687       // division okay because magnitude of L[i][i] already checked above.
688       L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
689     }
690   }
691 
692   return true;
693 }
694