1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "common/math/mat.h"
18
19 #include <assert.h>
20 #include <float.h>
21
22 #ifdef _OS_BUILD_
23 #include <nanohub_math.h>
24 #include <seos.h>
25 #else
26 #include <math.h>
27 #ifndef UNROLLED
28 #define UNROLLED
29 #endif
30 #endif // _OS_BUILD_
31
32 #include <stddef.h>
33 #include <string.h>
34
35 #define EPSILON 1E-5
36 #define CHOLESKY_TOLERANCE 1E-6
37
38 // Forward declarations.
39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
42 uint32_t l, uint32_t i, uint32_t j);
43
44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
45
initZeroMatrix(struct Mat33 * A)46 void initZeroMatrix(struct Mat33 *A) {
47 ASSERT_NOT_NULL(A);
48 memset(A->elem, 0.0f, sizeof(A->elem));
49 }
50
51 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)52 void initDiagonalMatrix(struct Mat33 *A, float x) {
53 ASSERT_NOT_NULL(A);
54 initZeroMatrix(A);
55
56 uint32_t i;
57 for (i = 0; i < 3; ++i) {
58 A->elem[i][i] = x;
59 }
60 }
61
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
63 const struct Vec3 *v2, const struct Vec3 *v3) {
64 ASSERT_NOT_NULL(A);
65 ASSERT_NOT_NULL(v1);
66 ASSERT_NOT_NULL(v2);
67 ASSERT_NOT_NULL(v3);
68 A->elem[0][0] = v1->x;
69 A->elem[0][1] = v2->x;
70 A->elem[0][2] = v3->x;
71
72 A->elem[1][0] = v1->y;
73 A->elem[1][1] = v2->y;
74 A->elem[1][2] = v3->y;
75
76 A->elem[2][0] = v1->z;
77 A->elem[2][1] = v2->z;
78 A->elem[2][2] = v3->z;
79 }
80
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
82 ASSERT_NOT_NULL(out);
83 ASSERT_NOT_NULL(A);
84 ASSERT_NOT_NULL(v);
85 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
86 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
87 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
88 }
89
90 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
92 const struct Mat33 *B) {
93 ASSERT_NOT_NULL(out);
94 ASSERT_NOT_NULL(A);
95 ASSERT_NOT_NULL(B);
96 ASSERT(out != A);
97 ASSERT(out != B);
98
99 uint32_t i;
100 for (i = 0; i < 3; ++i) {
101 uint32_t j;
102 for (j = 0; j < 3; ++j) {
103 uint32_t k;
104 float sum = 0.0f;
105 for (k = 0; k < 3; ++k) {
106 sum += A->elem[i][k] * B->elem[k][j];
107 }
108
109 out->elem[i][j] = sum;
110 }
111 }
112 }
113
114 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)115 void mat33ScalarMul(struct Mat33 *A, float c) {
116 ASSERT_NOT_NULL(A);
117 uint32_t i;
118 for (i = 0; i < 3; ++i) {
119 uint32_t j;
120 for (j = 0; j < 3; ++j) {
121 A->elem[i][j] *= c;
122 }
123 }
124 }
125
126 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
128 ASSERT_NOT_NULL(out);
129 ASSERT_NOT_NULL(A);
130 uint32_t i;
131 for (i = 0; i < 3; ++i) {
132 uint32_t j;
133 for (j = 0; j < 3; ++j) {
134 out->elem[i][j] += A->elem[i][j];
135 }
136 }
137 }
138
139 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
141 ASSERT_NOT_NULL(out);
142 ASSERT_NOT_NULL(A);
143 uint32_t i;
144 for (i = 0; i < 3; ++i) {
145 uint32_t j;
146 for (j = 0; j < 3; ++j) {
147 out->elem[i][j] -= A->elem[i][j];
148 }
149 }
150 }
151
152 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
154 ASSERT_NOT_NULL(A);
155 uint32_t i;
156 for (i = 0; i < 3; ++i) {
157 if (A->elem[i][i] < 0.0f) {
158 return 0;
159 }
160 }
161
162 for (i = 0; i < 3; ++i) {
163 uint32_t j;
164 for (j = i + 1; j < 3; ++j) {
165 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
166 return 0;
167 }
168 }
169 }
170
171 return 1;
172 }
173
174 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
176 const struct Mat33 *B) {
177 ASSERT(out != A);
178 ASSERT(out != B);
179 ASSERT_NOT_NULL(out);
180 ASSERT_NOT_NULL(A);
181 ASSERT_NOT_NULL(B);
182 uint32_t i;
183 for (i = 0; i < 3; ++i) {
184 uint32_t j;
185 for (j = 0; j < 3; ++j) {
186 uint32_t k;
187 float sum = 0.0f;
188 for (k = 0; k < 3; ++k) {
189 sum += A->elem[k][i] * B->elem[k][j];
190 }
191
192 out->elem[i][j] = sum;
193 }
194 }
195 }
196
197 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
199 const struct Mat33 *B) {
200 ASSERT(out != A);
201 ASSERT(out != B);
202 ASSERT_NOT_NULL(out);
203 ASSERT_NOT_NULL(A);
204 ASSERT_NOT_NULL(B);
205 uint32_t i;
206 for (i = 0; i < 3; ++i) {
207 uint32_t j;
208 for (j = 0; j < 3; ++j) {
209 uint32_t k;
210 float sum = 0.0f;
211 for (k = 0; k < 3; ++k) {
212 sum += A->elem[i][k] * B->elem[j][k];
213 }
214
215 out->elem[i][j] = sum;
216 }
217 }
218 }
219
220 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
222 ASSERT_NOT_NULL(out);
223 ASSERT_NOT_NULL(A);
224 float t;
225 initDiagonalMatrix(out, 1.0f);
226
227 struct Mat33 tmp = *A;
228
229 uint32_t i, k;
230 for (i = 0; i < 3; ++i) {
231 uint32_t swap = i;
232 uint32_t j;
233 for (j = i + 1; j < 3; ++j) {
234 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
235 swap = j;
236 }
237 }
238
239 if (swap != i) {
240 for (k = 0; k < 3; ++k) {
241 t = tmp.elem[i][k];
242 tmp.elem[i][k] = tmp.elem[swap][k];
243 tmp.elem[swap][k] = t;
244
245 t = out->elem[i][k];
246 out->elem[i][k] = out->elem[swap][k];
247 out->elem[swap][k] = t;
248 }
249 }
250 // divide by zero guard.
251 ASSERT(fabs(tmp.elem[i][i]) > 0);
252 if(!(fabs(tmp.elem[i][i]) > 0)) {
253 return;
254 }
255 t = 1.0f / tmp.elem[i][i];
256 for (k = 0; k < 3; ++k) {
257 tmp.elem[i][k] *= t;
258 out->elem[i][k] *= t;
259 }
260
261 for (j = 0; j < 3; ++j) {
262 if (j != i) {
263 t = tmp.elem[j][i];
264 for (k = 0; k < 3; ++k) {
265 tmp.elem[j][k] -= tmp.elem[i][k] * t;
266 out->elem[j][k] -= out->elem[i][k] * t;
267 }
268 }
269 }
270 }
271 }
272
273 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
275 ASSERT_NOT_NULL(out);
276 ASSERT_NOT_NULL(A);
277 uint32_t i;
278 for (i = 0; i < 3; ++i) {
279 uint32_t j;
280 for (j = 0; j < 3; ++j) {
281 out->elem[i][j] = A->elem[j][i];
282 }
283 }
284 }
285
286 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
288 ASSERT_NOT_NULL(A);
289 const uint32_t N = 3;
290 uint32_t k;
291
292 if (i == j) {
293 return;
294 }
295
296 for (k = 0; k < N; ++k) {
297 float tmp = A->elem[i][k];
298 A->elem[i][k] = A->elem[j][k];
299 A->elem[j][k] = tmp;
300 }
301 }
302
303 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
305 struct Mat33 *eigenvecs) {
306 ASSERT_NOT_NULL(S);
307 ASSERT_NOT_NULL(eigenvals);
308 ASSERT_NOT_NULL(eigenvecs);
309 const uint32_t N = 3;
310 uint32_t i, j, k, l, m;
311
312 float _eigenvals[N];
313
314 uint32_t ind[N];
315 for (k = 0; k < N; ++k) {
316 ind[k] = mat33Maxind(S, k);
317 _eigenvals[k] = S->elem[k][k];
318 }
319
320 initDiagonalMatrix(eigenvecs, 1.0f);
321
322 for (;;) {
323 m = 0;
324 for (k = 1; k + 1 < N; ++k) {
325 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
326 m = k;
327 }
328 }
329
330 k = m;
331 l = ind[m];
332 float p = S->elem[k][l];
333
334 if (fabsf(p) < EPSILON) {
335 break;
336 }
337
338 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
339
340 float t = fabsf(y) + sqrtf(p * p + y * y);
341 float s = sqrtf(p * p + t * t);
342 float c = t / s;
343 s = p / s;
344 t = p * p / t;
345
346 if (y < 0.0f) {
347 s = -s;
348 t = -t;
349 }
350
351 S->elem[k][l] = 0.0f;
352
353 _eigenvals[k] -= t;
354 _eigenvals[l] += t;
355
356 for (i = 0; i < k; ++i) {
357 mat33Rotate(S, c, s, i, k, i, l);
358 }
359
360 for (i = k + 1; i < l; ++i) {
361 mat33Rotate(S, c, s, k, i, i, l);
362 }
363
364 for (i = l + 1; i < N; ++i) {
365 mat33Rotate(S, c, s, k, i, l, i);
366 }
367
368 for (i = 0; i < N; ++i) {
369 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
370 eigenvecs->elem[l][i] =
371 s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
372 eigenvecs->elem[k][i] = tmp;
373 }
374
375 ind[k] = mat33Maxind(S, k);
376 ind[l] = mat33Maxind(S, l);
377
378 float sum = 0.0f;
379 for (i = 0; i < N; ++i) {
380 for (j = i + 1; j < N; ++j) {
381 sum += fabsf(S->elem[i][j]);
382 }
383 }
384
385 if (sum < EPSILON) {
386 break;
387 }
388 }
389
390 for (k = 0; k < N; ++k) {
391 m = k;
392 for (l = k + 1; l < N; ++l) {
393 if (_eigenvals[l] > _eigenvals[m]) {
394 m = l;
395 }
396 }
397
398 if (k != m) {
399 float tmp = _eigenvals[k];
400 _eigenvals[k] = _eigenvals[m];
401 _eigenvals[m] = tmp;
402
403 mat33SwapRows(eigenvecs, k, m);
404 }
405 }
406
407 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
408 }
409
410 // index of largest off-diagonal element in row k
411 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)412 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
413 ASSERT_NOT_NULL(A);
414 const uint32_t N = 3;
415
416 uint32_t m = k + 1;
417 uint32_t i;
418
419 for (i = k + 2; i < N; ++i) {
420 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
421 m = i;
422 }
423 }
424
425 return m;
426 }
427
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)428 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
429 uint32_t i, uint32_t j) {
430 ASSERT_NOT_NULL(A);
431 float tmp = c * A->elem[k][l] - s * A->elem[i][j];
432 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
433 A->elem[k][l] = tmp;
434 }
435
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)436 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
437 ASSERT_NOT_NULL(out);
438 ASSERT_NOT_NULL(A);
439 ASSERT_NOT_NULL(v);
440
441 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
442 A->elem[0][3] * v->w;
443
444 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
445 A->elem[1][3] * v->w;
446
447 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
448 A->elem[2][3] * v->w;
449
450 out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
451 A->elem[3][3] * v->w;
452 }
453
454 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)455 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
456 ASSERT_NOT_NULL(LU);
457 ASSERT_NOT_NULL(pivot);
458 const uint32_t N = 4;
459 uint32_t i, j, k;
460
461 for (k = 0; k < N; ++k) {
462 pivot->elem[k] = k;
463 float max = fabsf(LU->elem[k][k]);
464 for (j = k + 1; j < N; ++j) {
465 if (max < fabsf(LU->elem[j][k])) {
466 max = fabsf(LU->elem[j][k]);
467 pivot->elem[k] = j;
468 }
469 }
470
471 if (pivot->elem[k] != k) {
472 mat44SwapRows(LU, k, pivot->elem[k]);
473 }
474
475 if (fabsf(LU->elem[k][k]) < EPSILON) {
476 continue;
477 }
478
479 for (j = k + 1; j < N; ++j) {
480 LU->elem[k][j] /= LU->elem[k][k];
481 }
482
483 for (i = k + 1; i < N; ++i) {
484 for (j = k + 1; j < N; ++j) {
485 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
486 }
487 }
488 }
489 }
490
491 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)492 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
493 ASSERT_NOT_NULL(A);
494 const uint32_t N = 4;
495 uint32_t k;
496
497 if (i == j) {
498 return;
499 }
500
501 for (k = 0; k < N; ++k) {
502 float tmp = A->elem[i][k];
503 A->elem[i][k] = A->elem[j][k];
504 A->elem[j][k] = tmp;
505 }
506 }
507
508 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)509 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
510 const struct Size4 *pivot) {
511 ASSERT_NOT_NULL(A);
512 ASSERT_NOT_NULL(x);
513 ASSERT_NOT_NULL(b);
514 ASSERT_NOT_NULL(pivot);
515 const uint32_t N = 4;
516 uint32_t i, k;
517
518 float bCopy[N];
519 bCopy[0] = b->x;
520 bCopy[1] = b->y;
521 bCopy[2] = b->z;
522 bCopy[3] = b->w;
523
524 float _x[N];
525 for (k = 0; k < N; ++k) {
526 if (pivot->elem[k] != k) {
527 float tmp = bCopy[k];
528 bCopy[k] = bCopy[pivot->elem[k]];
529 bCopy[pivot->elem[k]] = tmp;
530 }
531
532 _x[k] = bCopy[k];
533 for (i = 0; i < k; ++i) {
534 _x[k] -= _x[i] * A->elem[k][i];
535 }
536 _x[k] /= A->elem[k][k];
537 }
538
539 for (k = N; k-- > 0;) {
540 for (i = k + 1; i < N; ++i) {
541 _x[k] -= _x[i] * A->elem[k][i];
542 }
543 }
544
545 initVec4(x, _x[0], _x[1], _x[2], _x[3]);
546 }
547
matMaxDiagonalElement(const float * square_mat,size_t n)548 float matMaxDiagonalElement(const float *square_mat, size_t n) {
549 ASSERT_NOT_NULL(square_mat);
550 ASSERT(n > 0);
551 size_t i;
552 float max = square_mat[0];
553 const size_t n_square = n * n;
554 const size_t offset = n + 1;
555 for (i = offset; i < n_square; i += offset) {
556 if (square_mat[i] > max) {
557 max = square_mat[i];
558 }
559 }
560 return max;
561 }
562
matAddConstantDiagonal(float * square_mat,float u,size_t n)563 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
564 ASSERT_NOT_NULL(square_mat);
565 size_t i;
566 const size_t n_square = n * n;
567 const size_t offset = n + 1;
568 for (i = 0; i < n_square; i += offset) {
569 square_mat[i] += u;
570 }
571 }
572
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)573 void matTransposeMultiplyMat(float *out, const float *A,
574 size_t nrows, size_t ncols) {
575 ASSERT_NOT_NULL(out);
576 ASSERT_NOT_NULL(A);
577 size_t i;
578 size_t j;
579 size_t k;
580 memset(out, 0, sizeof(float) * ncols * ncols);
581 for (i = 0; i < ncols; ++i) {
582 for (j = 0; j < ncols; ++j) {
583 // Since A' * A is symmetric, can use upper diagonal elements
584 // to fill in the lower diagonal without recomputing.
585 if (j < i) {
586 out[i * ncols + j] = out[j * ncols + i];
587 } else {
588 // mat_out[i, j] = ai ' * aj
589 out[i * ncols + j] = 0;
590 for (k = 0; k < nrows; ++k) {
591 out[i * ncols + j] += A[k * ncols + i] *
592 A[k * ncols + j];
593 }
594 }
595 }
596 }
597 }
598
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)599 void matMultiplyVec(float *out, const float *A, const float *v,
600 size_t nrows, size_t ncols) {
601 ASSERT_NOT_NULL(out);
602 ASSERT_NOT_NULL(A);
603 ASSERT_NOT_NULL(v);
604 size_t i;
605 for (i = 0; i < nrows; ++i) {
606 const float *row = &A[i * ncols];
607 out[i] = vecDot(row, v, (int)ncols);
608 }
609 }
610
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)611 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
612 size_t nrows, size_t ncols) {
613 ASSERT_NOT_NULL(out);
614 ASSERT_NOT_NULL(A);
615 ASSERT_NOT_NULL(v);
616 size_t i, j;
617 for (i = 0; i < ncols; ++i) {
618 out[i] = 0;
619 for (j = 0; j < nrows; ++j) {
620 out[i] += A[j * ncols + i] * v[j];
621 }
622 }
623 }
624
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)625 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
626 ASSERT_NOT_NULL(x);
627 ASSERT_NOT_NULL(L);
628 ASSERT_NOT_NULL(b);
629 ASSERT(n <= INT32_MAX);
630 int32_t i, j; // Loops below require signed integers.
631 int32_t s_n = (int32_t)n; // Signed n.
632 float sum = 0.0f;
633 // 1. Solve Ly = b through forward substitution. Use x[] to store y.
634 for (i = 0; i < s_n; ++i) {
635 sum = 0.0f;
636 for (j = 0; j < i; ++j) {
637 sum += L[i * s_n + j] * x[j];
638 }
639 // Check for non-zero diagonals (don't divide by zero).
640 if (L[i * s_n + i] < EPSILON) {
641 return false;
642 }
643 x[i] = (b[i] - sum) / L[i * s_n + i];
644 }
645
646 // 2. Solve L'x = y through backwards substitution. Use x[] to store both
647 // y and x.
648 for (i = s_n - 1; i >= 0; --i) {
649 sum = 0.0f;
650 for (j = i + 1; j < s_n; ++j) {
651 sum += L[j * s_n + i] * x[j];
652 }
653 x[i] = (x[i] - sum) / L[i * s_n + i];
654 }
655
656 return true;
657 }
658
matCholeskyDecomposition(float * L,const float * A,size_t n)659 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
660 ASSERT_NOT_NULL(L);
661 ASSERT_NOT_NULL(A);
662 size_t i, j, k;
663 float sum = 0.0f;
664 // initialize L to zero.
665 memset(L, 0, sizeof(float) * n * n);
666
667 for (i = 0; i < n; ++i) {
668 // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
669 sum = 0.0f;
670 for (k = 0; k < i; ++k) {
671 sum += L[i * n + k] * L[i * n + k];
672 }
673 sum = A[i * n + i] - sum;
674 // If diagonal element of L is too small, cholesky fails.
675 if (sum < CHOLESKY_TOLERANCE) {
676 return false;
677 }
678 L[i * n + i] = sqrtf(sum);
679
680 // for j = i+1:N, compute L[j][i] =
681 // (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
682 for (j = i + 1; j < n; ++j) {
683 sum = 0.0f;
684 for (k = 0; k < i; ++k) {
685 sum += L[i * n + k] * L[j * n + k];
686 }
687 // division okay because magnitude of L[i][i] already checked above.
688 L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
689 }
690 }
691
692 return true;
693 }
694