1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "_cxcore.h"
43 #include <float.h>
44 
45 /////////////////////////////////////////////////////////////////////////////////////////
46 
47 #define icvGivens_64f( n, x, y, c, s ) \
48 {                                      \
49     int _i;                            \
50     double* _x = (x);                  \
51     double* _y = (y);                  \
52                                        \
53     for( _i = 0; _i < n; _i++ )        \
54     {                                  \
55         double t0 = _x[_i];            \
56         double t1 = _y[_i];            \
57         _x[_i] = t0*c + t1*s;          \
58         _y[_i] = -t0*s + t1*c;         \
59     }                                  \
60 }
61 
62 
63 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
64 static  void
icvMatrAXPY_64f(int m,int n,const double * x,int dx,const double * a,double * y,int dy)65 icvMatrAXPY_64f( int m, int n, const double* x, int dx,
66                  const double* a, double* y, int dy )
67 {
68     int i, j;
69 
70     for( i = 0; i < m; i++, x += dx, y += dy )
71     {
72         double s = a[i];
73 
74         for( j = 0; j <= n - 4; j += 4 )
75         {
76             double t0 = y[j]   + s*x[j];
77             double t1 = y[j+1] + s*x[j+1];
78             y[j]   = t0;
79             y[j+1] = t1;
80             t0 = y[j+2] + s*x[j+2];
81             t1 = y[j+3] + s*x[j+3];
82             y[j+2] = t0;
83             y[j+3] = t1;
84         }
85 
86         for( ; j < n; j++ ) y[j] += s*x[j];
87     }
88 }
89 
90 
91 /* y[1:m,-1] = h*y[1:m,0:n]*x[0:1,0:n]'*x[-1]  (this is used for U&V reconstruction)
92    y[1:m,0:n] += h*y[1:m,0:n]*x[0:1,0:n]'*x[0:1,0:n] */
93 static void
icvMatrAXPY3_64f(int m,int n,const double * x,int l,double * y,double h)94 icvMatrAXPY3_64f( int m, int n, const double* x, int l, double* y, double h )
95 {
96     int i, j;
97 
98     for( i = 1; i < m; i++ )
99     {
100         double s = 0;
101 
102         y += l;
103 
104         for( j = 0; j <= n - 4; j += 4 )
105             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
106 
107         for( ; j < n; j++ )  s += x[j]*y[j];
108 
109         s *= h;
110         y[-1] = s*x[-1];
111 
112         for( j = 0; j <= n - 4; j += 4 )
113         {
114             double t0 = y[j]   + s*x[j];
115             double t1 = y[j+1] + s*x[j+1];
116             y[j]   = t0;
117             y[j+1] = t1;
118             t0 = y[j+2] + s*x[j+2];
119             t1 = y[j+3] + s*x[j+3];
120             y[j+2] = t0;
121             y[j+3] = t1;
122         }
123 
124         for( ; j < n; j++ ) y[j] += s*x[j];
125     }
126 }
127 
128 
129 #define icvGivens_32f( n, x, y, c, s ) \
130 {                                      \
131     int _i;                            \
132     float* _x = (x);                   \
133     float* _y = (y);                   \
134                                        \
135     for( _i = 0; _i < n; _i++ )        \
136     {                                  \
137         double t0 = _x[_i];            \
138         double t1 = _y[_i];            \
139         _x[_i] = (float)(t0*c + t1*s); \
140         _y[_i] = (float)(-t0*s + t1*c);\
141     }                                  \
142 }
143 
144 static  void
icvMatrAXPY_32f(int m,int n,const float * x,int dx,const float * a,float * y,int dy)145 icvMatrAXPY_32f( int m, int n, const float* x, int dx,
146                  const float* a, float* y, int dy )
147 {
148     int i, j;
149 
150     for( i = 0; i < m; i++, x += dx, y += dy )
151     {
152         double s = a[i];
153 
154         for( j = 0; j <= n - 4; j += 4 )
155         {
156             double t0 = y[j]   + s*x[j];
157             double t1 = y[j+1] + s*x[j+1];
158             y[j]   = (float)t0;
159             y[j+1] = (float)t1;
160             t0 = y[j+2] + s*x[j+2];
161             t1 = y[j+3] + s*x[j+3];
162             y[j+2] = (float)t0;
163             y[j+3] = (float)t1;
164         }
165 
166         for( ; j < n; j++ )
167             y[j] = (float)(y[j] + s*x[j]);
168     }
169 }
170 
171 
172 static void
icvMatrAXPY3_32f(int m,int n,const float * x,int l,float * y,double h)173 icvMatrAXPY3_32f( int m, int n, const float* x, int l, float* y, double h )
174 {
175     int i, j;
176 
177     for( i = 1; i < m; i++ )
178     {
179         double s = 0;
180         y += l;
181 
182         for( j = 0; j <= n - 4; j += 4 )
183             s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];
184 
185         for( ; j < n; j++ )  s += x[j]*y[j];
186 
187         s *= h;
188         y[-1] = (float)(s*x[-1]);
189 
190         for( j = 0; j <= n - 4; j += 4 )
191         {
192             double t0 = y[j]   + s*x[j];
193             double t1 = y[j+1] + s*x[j+1];
194             y[j]   = (float)t0;
195             y[j+1] = (float)t1;
196             t0 = y[j+2] + s*x[j+2];
197             t1 = y[j+3] + s*x[j+3];
198             y[j+2] = (float)t0;
199             y[j+3] = (float)t1;
200         }
201 
202         for( ; j < n; j++ ) y[j] = (float)(y[j] + s*x[j]);
203     }
204 }
205 
206 /* accurate hypotenuse calculation */
207 static double
pythag(double a,double b)208 pythag( double a, double b )
209 {
210     a = fabs( a );
211     b = fabs( b );
212     if( a > b )
213     {
214         b /= a;
215         a *= sqrt( 1. + b * b );
216     }
217     else if( b != 0 )
218     {
219         a /= b;
220         a = b * sqrt( 1. + a * a );
221     }
222 
223     return a;
224 }
225 
226 /****************************************************************************************/
227 /****************************************************************************************/
228 
229 #define MAX_ITERS  30
230 
231 static void
icvSVD_64f(double * a,int lda,int m,int n,double * w,double * uT,int lduT,int nu,double * vT,int ldvT,double * buffer)232 icvSVD_64f( double* a, int lda, int m, int n,
233             double* w,
234             double* uT, int lduT, int nu,
235             double* vT, int ldvT,
236             double* buffer )
237 {
238     double* e;
239     double* temp;
240     double *w1, *e1;
241     double *hv;
242     double ku0 = 0, kv0 = 0;
243     double anorm = 0;
244     double *a1, *u0 = uT, *v0 = vT;
245     double scale, h;
246     int i, j, k, l;
247     int nm, m1, n1;
248     int nv = n;
249     int iters = 0;
250     double* hv0 = (double*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
251 
252     e = buffer;
253     w1 = w;
254     e1 = e + 1;
255     nm = n;
256 
257     temp = buffer + nm;
258 
259     memset( w, 0, nm * sizeof( w[0] ));
260     memset( e, 0, nm * sizeof( e[0] ));
261 
262     m1 = m;
263     n1 = n;
264 
265     /* transform a to bi-diagonal form */
266     for( ;; )
267     {
268         int update_u;
269         int update_v;
270 
271         if( m1 == 0 )
272             break;
273 
274         scale = h = 0;
275         update_u = uT && m1 > m - nu;
276         hv = update_u ? uT : hv0;
277 
278         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
279         {
280             double t = a1[0];
281             scale += fabs( hv[j] = t );
282         }
283 
284         if( scale != 0 )
285         {
286             double f = 1./scale, g, s = 0;
287 
288             for( j = 0; j < m1; j++ )
289             {
290                 double t = (hv[j] *= f);
291                 s += t * t;
292             }
293 
294             g = sqrt( s );
295             f = hv[0];
296             if( f >= 0 )
297                 g = -g;
298             hv[0] = f - g;
299             h = 1. / (f * g - s);
300 
301             memset( temp, 0, n1 * sizeof( temp[0] ));
302 
303             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
304             icvMatrAXPY_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
305             for( k = 1; k < n1; k++ ) temp[k] *= h;
306 
307             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
308             icvMatrAXPY_64f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
309             *w1 = g*scale;
310         }
311         w1++;
312 
313         /* store -2/(hv'*hv) */
314         if( update_u )
315         {
316             if( m1 == m )
317                 ku0 = h;
318             else
319                 hv[-1] = h;
320         }
321 
322         a++;
323         n1--;
324         if( vT )
325             vT += ldvT + 1;
326 
327         if( n1 == 0 )
328             break;
329 
330         scale = h = 0;
331         update_v = vT && n1 > n - nv;
332 
333         hv = update_v ? vT : hv0;
334 
335         for( j = 0; j < n1; j++ )
336         {
337             double t = a[j];
338             scale += fabs( hv[j] = t );
339         }
340 
341         if( scale != 0 )
342         {
343             double f = 1./scale, g, s = 0;
344 
345             for( j = 0; j < n1; j++ )
346             {
347                 double t = (hv[j] *= f);
348                 s += t * t;
349             }
350 
351             g = sqrt( s );
352             f = hv[0];
353             if( f >= 0 )
354                 g = -g;
355             hv[0] = f - g;
356             h = 1. / (f * g - s);
357             hv[-1] = 0.;
358 
359             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
360             icvMatrAXPY3_64f( m1, n1, hv, lda, a, h );
361 
362             *e1 = g*scale;
363         }
364         e1++;
365 
366         /* store -2/(hv'*hv) */
367         if( update_v )
368         {
369             if( n1 == n )
370                 kv0 = h;
371             else
372                 hv[-1] = h;
373         }
374 
375         a += lda;
376         m1--;
377         if( uT )
378             uT += lduT + 1;
379     }
380 
381     m1 -= m1 != 0;
382     n1 -= n1 != 0;
383 
384     /* accumulate left transformations */
385     if( uT )
386     {
387         m1 = m - m1;
388         uT = u0 + m1 * lduT;
389         for( i = m1; i < nu; i++, uT += lduT )
390         {
391             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
392             uT[i] = 1.;
393         }
394 
395         for( i = m1 - 1; i >= 0; i-- )
396         {
397             double s;
398             int lh = nu - i;
399 
400             l = m - i;
401 
402             hv = u0 + (lduT + 1) * i;
403             h = i == 0 ? ku0 : hv[-1];
404 
405             assert( h <= 0 );
406 
407             if( h != 0 )
408             {
409                 uT = hv;
410                 icvMatrAXPY3_64f( lh, l-1, hv+1, lduT, uT+1, h );
411 
412                 s = hv[0] * h;
413                 for( k = 0; k < l; k++ ) hv[k] *= s;
414                 hv[0] += 1;
415             }
416             else
417             {
418                 for( j = 1; j < l; j++ )
419                     hv[j] = 0;
420                 for( j = 1; j < lh; j++ )
421                     hv[j * lduT] = 0;
422                 hv[0] = 1;
423             }
424         }
425         uT = u0;
426     }
427 
428     /* accumulate right transformations */
429     if( vT )
430     {
431         n1 = n - n1;
432         vT = v0 + n1 * ldvT;
433         for( i = n1; i < nv; i++, vT += ldvT )
434         {
435             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
436             vT[i] = 1.;
437         }
438 
439         for( i = n1 - 1; i >= 0; i-- )
440         {
441             double s;
442             int lh = nv - i;
443 
444             l = n - i;
445             hv = v0 + (ldvT + 1) * i;
446             h = i == 0 ? kv0 : hv[-1];
447 
448             assert( h <= 0 );
449 
450             if( h != 0 )
451             {
452                 vT = hv;
453                 icvMatrAXPY3_64f( lh, l-1, hv+1, ldvT, vT+1, h );
454 
455                 s = hv[0] * h;
456                 for( k = 0; k < l; k++ ) hv[k] *= s;
457                 hv[0] += 1;
458             }
459             else
460             {
461                 for( j = 1; j < l; j++ )
462                     hv[j] = 0;
463                 for( j = 1; j < lh; j++ )
464                     hv[j * ldvT] = 0;
465                 hv[0] = 1;
466             }
467         }
468         vT = v0;
469     }
470 
471     for( i = 0; i < nm; i++ )
472     {
473         double tnorm = fabs( w[i] );
474         tnorm += fabs( e[i] );
475 
476         if( anorm < tnorm )
477             anorm = tnorm;
478     }
479 
480     anorm *= DBL_EPSILON;
481 
482     /* diagonalization of the bidiagonal form */
483     for( k = nm - 1; k >= 0; k-- )
484     {
485         double z = 0;
486         iters = 0;
487 
488         for( ;; )               /* do iterations */
489         {
490             double c, s, f, g, x, y;
491             int flag = 0;
492 
493             /* test for splitting */
494             for( l = k; l >= 0; l-- )
495             {
496                 if( fabs(e[l]) <= anorm )
497                 {
498                     flag = 1;
499                     break;
500                 }
501                 assert( l > 0 );
502                 if( fabs(w[l - 1]) <= anorm )
503                     break;
504             }
505 
506             if( !flag )
507             {
508                 c = 0;
509                 s = 1;
510 
511                 for( i = l; i <= k; i++ )
512                 {
513                     f = s * e[i];
514 
515                     e[i] *= c;
516 
517                     if( anorm + fabs( f ) == anorm )
518                         break;
519 
520                     g = w[i];
521                     h = pythag( f, g );
522                     w[i] = h;
523                     c = g / h;
524                     s = -f / h;
525 
526                     if( uT )
527                         icvGivens_64f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
528                 }
529             }
530 
531             z = w[k];
532             if( l == k || iters++ == MAX_ITERS )
533                 break;
534 
535             /* shift from bottom 2x2 minor */
536             x = w[l];
537             y = w[k - 1];
538             g = e[k - 1];
539             h = e[k];
540             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
541             g = pythag( f, 1 );
542             if( f < 0 )
543                 g = -g;
544             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
545             /* next QR transformation */
546             c = s = 1;
547 
548             for( i = l + 1; i <= k; i++ )
549             {
550                 g = e[i];
551                 y = w[i];
552                 h = s * g;
553                 g *= c;
554                 z = pythag( f, h );
555                 e[i - 1] = z;
556                 c = f / z;
557                 s = h / z;
558                 f = x * c + g * s;
559                 g = -x * s + g * c;
560                 h = y * s;
561                 y *= c;
562 
563                 if( vT )
564                     icvGivens_64f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
565 
566                 z = pythag( f, h );
567                 w[i - 1] = z;
568 
569                 /* rotation can be arbitrary if z == 0 */
570                 if( z != 0 )
571                 {
572                     c = f / z;
573                     s = h / z;
574                 }
575                 f = c * g + s * y;
576                 x = -s * g + c * y;
577 
578                 if( uT )
579                     icvGivens_64f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
580             }
581 
582             e[l] = 0;
583             e[k] = f;
584             w[k] = x;
585         }                       /* end of iteration loop */
586 
587         if( iters > MAX_ITERS )
588             break;
589 
590         if( z < 0 )
591         {
592             w[k] = -z;
593             if( vT )
594             {
595                 for( j = 0; j < n; j++ )
596                     vT[j + k * ldvT] = -vT[j + k * ldvT];
597             }
598         }
599     }                           /* end of diagonalization loop */
600 
601     /* sort singular values and corresponding values */
602     for( i = 0; i < nm; i++ )
603     {
604         k = i;
605         for( j = i + 1; j < nm; j++ )
606             if( w[k] < w[j] )
607                 k = j;
608 
609         if( k != i )
610         {
611             double t;
612             CV_SWAP( w[i], w[k], t );
613 
614             if( vT )
615                 for( j = 0; j < n; j++ )
616                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
617 
618             if( uT )
619                 for( j = 0; j < m; j++ )
620                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
621         }
622     }
623 }
624 
625 
626 static void
icvSVD_32f(float * a,int lda,int m,int n,float * w,float * uT,int lduT,int nu,float * vT,int ldvT,float * buffer)627 icvSVD_32f( float* a, int lda, int m, int n,
628             float* w,
629             float* uT, int lduT, int nu,
630             float* vT, int ldvT,
631             float* buffer )
632 {
633     float* e;
634     float* temp;
635     float *w1, *e1;
636     float *hv;
637     double ku0 = 0, kv0 = 0;
638     double anorm = 0;
639     float *a1, *u0 = uT, *v0 = vT;
640     double scale, h;
641     int i, j, k, l;
642     int nm, m1, n1;
643     int nv = n;
644     int iters = 0;
645     float* hv0 = (float*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1;
646 
647     e = buffer;
648 
649     w1 = w;
650     e1 = e + 1;
651     nm = n;
652 
653     temp = buffer + nm;
654 
655     memset( w, 0, nm * sizeof( w[0] ));
656     memset( e, 0, nm * sizeof( e[0] ));
657 
658     m1 = m;
659     n1 = n;
660 
661     /* transform a to bi-diagonal form */
662     for( ;; )
663     {
664         int update_u;
665         int update_v;
666 
667         if( m1 == 0 )
668             break;
669 
670         scale = h = 0;
671 
672         update_u = uT && m1 > m - nu;
673         hv = update_u ? uT : hv0;
674 
675         for( j = 0, a1 = a; j < m1; j++, a1 += lda )
676         {
677             double t = a1[0];
678             scale += fabs( hv[j] = (float)t );
679         }
680 
681         if( scale != 0 )
682         {
683             double f = 1./scale, g, s = 0;
684 
685             for( j = 0; j < m1; j++ )
686             {
687                 double t = (hv[j] = (float)(hv[j]*f));
688                 s += t * t;
689             }
690 
691             g = sqrt( s );
692             f = hv[0];
693             if( f >= 0 )
694                 g = -g;
695             hv[0] = (float)(f - g);
696             h = 1. / (f * g - s);
697 
698             memset( temp, 0, n1 * sizeof( temp[0] ));
699 
700             /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
701             icvMatrAXPY_32f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
702 
703             for( k = 1; k < n1; k++ ) temp[k] = (float)(temp[k]*h);
704 
705             /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
706             icvMatrAXPY_32f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
707             *w1 = (float)(g*scale);
708         }
709         w1++;
710 
711         /* store -2/(hv'*hv) */
712         if( update_u )
713         {
714             if( m1 == m )
715                 ku0 = h;
716             else
717                 hv[-1] = (float)h;
718         }
719 
720         a++;
721         n1--;
722         if( vT )
723             vT += ldvT + 1;
724 
725         if( n1 == 0 )
726             break;
727 
728         scale = h = 0;
729         update_v = vT && n1 > n - nv;
730         hv = update_v ? vT : hv0;
731 
732         for( j = 0; j < n1; j++ )
733         {
734             double t = a[j];
735             scale += fabs( hv[j] = (float)t );
736         }
737 
738         if( scale != 0 )
739         {
740             double f = 1./scale, g, s = 0;
741 
742             for( j = 0; j < n1; j++ )
743             {
744                 double t = (hv[j] = (float)(hv[j]*f));
745                 s += t * t;
746             }
747 
748             g = sqrt( s );
749             f = hv[0];
750             if( f >= 0 )
751                 g = -g;
752             hv[0] = (float)(f - g);
753             h = 1. / (f * g - s);
754             hv[-1] = 0.f;
755 
756             /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
757             icvMatrAXPY3_32f( m1, n1, hv, lda, a, h );
758 
759             *e1 = (float)(g*scale);
760         }
761         e1++;
762 
763         /* store -2/(hv'*hv) */
764         if( update_v )
765         {
766             if( n1 == n )
767                 kv0 = h;
768             else
769                 hv[-1] = (float)h;
770         }
771 
772         a += lda;
773         m1--;
774         if( uT )
775             uT += lduT + 1;
776     }
777 
778     m1 -= m1 != 0;
779     n1 -= n1 != 0;
780 
781     /* accumulate left transformations */
782     if( uT )
783     {
784         m1 = m - m1;
785         uT = u0 + m1 * lduT;
786         for( i = m1; i < nu; i++, uT += lduT )
787         {
788             memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
789             uT[i] = 1.;
790         }
791 
792         for( i = m1 - 1; i >= 0; i-- )
793         {
794             double s;
795             int lh = nu - i;
796 
797             l = m - i;
798 
799             hv = u0 + (lduT + 1) * i;
800             h = i == 0 ? ku0 : hv[-1];
801 
802             assert( h <= 0 );
803 
804             if( h != 0 )
805             {
806                 uT = hv;
807                 icvMatrAXPY3_32f( lh, l-1, hv+1, lduT, uT+1, h );
808 
809                 s = hv[0] * h;
810                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
811                 hv[0] += 1;
812             }
813             else
814             {
815                 for( j = 1; j < l; j++ )
816                     hv[j] = 0;
817                 for( j = 1; j < lh; j++ )
818                     hv[j * lduT] = 0;
819                 hv[0] = 1;
820             }
821         }
822         uT = u0;
823     }
824 
825     /* accumulate right transformations */
826     if( vT )
827     {
828         n1 = n - n1;
829         vT = v0 + n1 * ldvT;
830         for( i = n1; i < nv; i++, vT += ldvT )
831         {
832             memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
833             vT[i] = 1.;
834         }
835 
836         for( i = n1 - 1; i >= 0; i-- )
837         {
838             double s;
839             int lh = nv - i;
840 
841             l = n - i;
842             hv = v0 + (ldvT + 1) * i;
843             h = i == 0 ? kv0 : hv[-1];
844 
845             assert( h <= 0 );
846 
847             if( h != 0 )
848             {
849                 vT = hv;
850                 icvMatrAXPY3_32f( lh, l-1, hv+1, ldvT, vT+1, h );
851 
852                 s = hv[0] * h;
853                 for( k = 0; k < l; k++ ) hv[k] = (float)(hv[k]*s);
854                 hv[0] += 1;
855             }
856             else
857             {
858                 for( j = 1; j < l; j++ )
859                     hv[j] = 0;
860                 for( j = 1; j < lh; j++ )
861                     hv[j * ldvT] = 0;
862                 hv[0] = 1;
863             }
864         }
865         vT = v0;
866     }
867 
868     for( i = 0; i < nm; i++ )
869     {
870         double tnorm = fabs( w[i] );
871         tnorm += fabs( e[i] );
872 
873         if( anorm < tnorm )
874             anorm = tnorm;
875     }
876 
877     anorm *= FLT_EPSILON;
878 
879     /* diagonalization of the bidiagonal form */
880     for( k = nm - 1; k >= 0; k-- )
881     {
882         double z = 0;
883         iters = 0;
884 
885         for( ;; )               /* do iterations */
886         {
887             double c, s, f, g, x, y;
888             int flag = 0;
889 
890             /* test for splitting */
891             for( l = k; l >= 0; l-- )
892             {
893                 if( fabs( e[l] ) <= anorm )
894                 {
895                     flag = 1;
896                     break;
897                 }
898                 assert( l > 0 );
899                 if( fabs( w[l - 1] ) <= anorm )
900                     break;
901             }
902 
903             if( !flag )
904             {
905                 c = 0;
906                 s = 1;
907 
908                 for( i = l; i <= k; i++ )
909                 {
910                     f = s * e[i];
911                     e[i] = (float)(e[i]*c);
912 
913                     if( anorm + fabs( f ) == anorm )
914                         break;
915 
916                     g = w[i];
917                     h = pythag( f, g );
918                     w[i] = (float)h;
919                     c = g / h;
920                     s = -f / h;
921 
922                     if( uT )
923                         icvGivens_32f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
924                 }
925             }
926 
927             z = w[k];
928             if( l == k || iters++ == MAX_ITERS )
929                 break;
930 
931             /* shift from bottom 2x2 minor */
932             x = w[l];
933             y = w[k - 1];
934             g = e[k - 1];
935             h = e[k];
936             f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
937             g = pythag( f, 1 );
938             if( f < 0 )
939                 g = -g;
940             f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
941             /* next QR transformation */
942             c = s = 1;
943 
944             for( i = l + 1; i <= k; i++ )
945             {
946                 g = e[i];
947                 y = w[i];
948                 h = s * g;
949                 g *= c;
950                 z = pythag( f, h );
951                 e[i - 1] = (float)z;
952                 c = f / z;
953                 s = h / z;
954                 f = x * c + g * s;
955                 g = -x * s + g * c;
956                 h = y * s;
957                 y *= c;
958 
959                 if( vT )
960                     icvGivens_32f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );
961 
962                 z = pythag( f, h );
963                 w[i - 1] = (float)z;
964 
965                 /* rotation can be arbitrary if z == 0 */
966                 if( z != 0 )
967                 {
968                     c = f / z;
969                     s = h / z;
970                 }
971                 f = c * g + s * y;
972                 x = -s * g + c * y;
973 
974                 if( uT )
975                     icvGivens_32f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
976             }
977 
978             e[l] = 0;
979             e[k] = (float)f;
980             w[k] = (float)x;
981         }                       /* end of iteration loop */
982 
983         if( iters > MAX_ITERS )
984             break;
985 
986         if( z < 0 )
987         {
988             w[k] = (float)(-z);
989             if( vT )
990             {
991                 for( j = 0; j < n; j++ )
992                     vT[j + k * ldvT] = -vT[j + k * ldvT];
993             }
994         }
995     }                           /* end of diagonalization loop */
996 
997     /* sort singular values and corresponding vectors */
998     for( i = 0; i < nm; i++ )
999     {
1000         k = i;
1001         for( j = i + 1; j < nm; j++ )
1002             if( w[k] < w[j] )
1003                 k = j;
1004 
1005         if( k != i )
1006         {
1007             float t;
1008             CV_SWAP( w[i], w[k], t );
1009 
1010             if( vT )
1011                 for( j = 0; j < n; j++ )
1012                     CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t );
1013 
1014             if( uT )
1015                 for( j = 0; j < m; j++ )
1016                     CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t );
1017         }
1018     }
1019 }
1020 
1021 
1022 static void
icvSVBkSb_64f(int m,int n,const double * w,const double * uT,int lduT,const double * vT,int ldvT,const double * b,int ldb,int nb,double * x,int ldx,double * buffer)1023 icvSVBkSb_64f( int m, int n, const double* w,
1024                const double* uT, int lduT,
1025                const double* vT, int ldvT,
1026                const double* b, int ldb, int nb,
1027                double* x, int ldx, double* buffer )
1028 {
1029     double threshold = 0;
1030     int i, j, nm = MIN( m, n );
1031 
1032     if( !b )
1033         nb = m;
1034 
1035     for( i = 0; i < n; i++ )
1036         memset( x + i*ldx, 0, nb*sizeof(x[0]));
1037 
1038     for( i = 0; i < nm; i++ )
1039         threshold += w[i];
1040     threshold *= 2*DBL_EPSILON;
1041 
1042     /* vT * inv(w) * uT * b */
1043     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
1044     {
1045         double wi = w[i];
1046 
1047         if( wi > threshold )
1048         {
1049             wi = 1./wi;
1050 
1051             if( nb == 1 )
1052             {
1053                 double s = 0;
1054                 if( b )
1055                 {
1056                     if( ldb == 1 )
1057                     {
1058                         for( j = 0; j <= m - 4; j += 4 )
1059                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
1060                         for( ; j < m; j++ )
1061                             s += uT[j]*b[j];
1062                     }
1063                     else
1064                     {
1065                         for( j = 0; j < m; j++ )
1066                             s += uT[j]*b[j*ldb];
1067                     }
1068                 }
1069                 else
1070                     s = uT[0];
1071                 s *= wi;
1072                 if( ldx == 1 )
1073                 {
1074                     for( j = 0; j <= n - 4; j += 4 )
1075                     {
1076                         double t0 = x[j] + s*vT[j];
1077                         double t1 = x[j+1] + s*vT[j+1];
1078                         x[j] = t0;
1079                         x[j+1] = t1;
1080                         t0 = x[j+2] + s*vT[j+2];
1081                         t1 = x[j+3] + s*vT[j+3];
1082                         x[j+2] = t0;
1083                         x[j+3] = t1;
1084                     }
1085 
1086                     for( ; j < n; j++ )
1087                         x[j] += s*vT[j];
1088                 }
1089                 else
1090                 {
1091                     for( j = 0; j < n; j++ )
1092                         x[j*ldx] += s*vT[j];
1093                 }
1094             }
1095             else
1096             {
1097                 if( b )
1098                 {
1099                     memset( buffer, 0, nb*sizeof(buffer[0]));
1100                     icvMatrAXPY_64f( m, nb, b, ldb, uT, buffer, 0 );
1101                     for( j = 0; j < nb; j++ )
1102                         buffer[j] *= wi;
1103                 }
1104                 else
1105                 {
1106                     for( j = 0; j < nb; j++ )
1107                         buffer[j] = uT[j]*wi;
1108                 }
1109                 icvMatrAXPY_64f( n, nb, buffer, 0, vT, x, ldx );
1110             }
1111         }
1112     }
1113 }
1114 
1115 
1116 static void
icvSVBkSb_32f(int m,int n,const float * w,const float * uT,int lduT,const float * vT,int ldvT,const float * b,int ldb,int nb,float * x,int ldx,float * buffer)1117 icvSVBkSb_32f( int m, int n, const float* w,
1118                const float* uT, int lduT,
1119                const float* vT, int ldvT,
1120                const float* b, int ldb, int nb,
1121                float* x, int ldx, float* buffer )
1122 {
1123     float threshold = 0.f;
1124     int i, j, nm = MIN( m, n );
1125 
1126     if( !b )
1127         nb = m;
1128 
1129     for( i = 0; i < n; i++ )
1130         memset( x + i*ldx, 0, nb*sizeof(x[0]));
1131 
1132     for( i = 0; i < nm; i++ )
1133         threshold += w[i];
1134     threshold *= 2*FLT_EPSILON;
1135 
1136     /* vT * inv(w) * uT * b */
1137     for( i = 0; i < nm; i++, uT += lduT, vT += ldvT )
1138     {
1139         double wi = w[i];
1140 
1141         if( wi > threshold )
1142         {
1143             wi = 1./wi;
1144 
1145             if( nb == 1 )
1146             {
1147                 double s = 0;
1148                 if( b )
1149                 {
1150                     if( ldb == 1 )
1151                     {
1152                         for( j = 0; j <= m - 4; j += 4 )
1153                             s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
1154                         for( ; j < m; j++ )
1155                             s += uT[j]*b[j];
1156                     }
1157                     else
1158                     {
1159                         for( j = 0; j < m; j++ )
1160                             s += uT[j]*b[j*ldb];
1161                     }
1162                 }
1163                 else
1164                     s = uT[0];
1165                 s *= wi;
1166 
1167                 if( ldx == 1 )
1168                 {
1169                     for( j = 0; j <= n - 4; j += 4 )
1170                     {
1171                         double t0 = x[j] + s*vT[j];
1172                         double t1 = x[j+1] + s*vT[j+1];
1173                         x[j] = (float)t0;
1174                         x[j+1] = (float)t1;
1175                         t0 = x[j+2] + s*vT[j+2];
1176                         t1 = x[j+3] + s*vT[j+3];
1177                         x[j+2] = (float)t0;
1178                         x[j+3] = (float)t1;
1179                     }
1180 
1181                     for( ; j < n; j++ )
1182                         x[j] = (float)(x[j] + s*vT[j]);
1183                 }
1184                 else
1185                 {
1186                     for( j = 0; j < n; j++ )
1187                         x[j*ldx] = (float)(x[j*ldx] + s*vT[j]);
1188                 }
1189             }
1190             else
1191             {
1192                 if( b )
1193                 {
1194                     memset( buffer, 0, nb*sizeof(buffer[0]));
1195                     icvMatrAXPY_32f( m, nb, b, ldb, uT, buffer, 0 );
1196                     for( j = 0; j < nb; j++ )
1197                         buffer[j] = (float)(buffer[j]*wi);
1198                 }
1199                 else
1200                 {
1201                     for( j = 0; j < nb; j++ )
1202                         buffer[j] = (float)(uT[j]*wi);
1203                 }
1204                 icvMatrAXPY_32f( n, nb, buffer, 0, vT, x, ldx );
1205             }
1206         }
1207     }
1208 }
1209 
1210 
1211 CV_IMPL  void
cvSVD(CvArr * aarr,CvArr * warr,CvArr * uarr,CvArr * varr,int flags)1212 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
1213 {
1214     uchar* buffer = 0;
1215     int local_alloc = 0;
1216 
1217     CV_FUNCNAME( "cvSVD" );
1218 
1219     __BEGIN__;
1220 
1221     CvMat astub, *a = (CvMat*)aarr;
1222     CvMat wstub, *w = (CvMat*)warr;
1223     CvMat ustub, *u;
1224     CvMat vstub, *v;
1225     CvMat tmat;
1226     uchar* tw = 0;
1227     int type;
1228     int a_buf_offset = 0, u_buf_offset = 0, buf_size, pix_size;
1229     int temp_u = 0, /* temporary storage for U is needed */
1230         t_svd; /* special case: a->rows < a->cols */
1231     int m, n;
1232     int w_rows, w_cols;
1233     int u_rows = 0, u_cols = 0;
1234     int w_is_mat = 0;
1235 
1236     if( !CV_IS_MAT( a ))
1237         CV_CALL( a = cvGetMat( a, &astub ));
1238 
1239     if( !CV_IS_MAT( w ))
1240         CV_CALL( w = cvGetMat( w, &wstub ));
1241 
1242     if( !CV_ARE_TYPES_EQ( a, w ))
1243         CV_ERROR( CV_StsUnmatchedFormats, "" );
1244 
1245     if( a->rows >= a->cols )
1246     {
1247         m = a->rows;
1248         n = a->cols;
1249         w_rows = w->rows;
1250         w_cols = w->cols;
1251         t_svd = 0;
1252     }
1253     else
1254     {
1255         CvArr* t;
1256         CV_SWAP( uarr, varr, t );
1257 
1258         flags = (flags & CV_SVD_U_T ? CV_SVD_V_T : 0)|
1259                 (flags & CV_SVD_V_T ? CV_SVD_U_T : 0);
1260         m = a->cols;
1261         n = a->rows;
1262         w_rows = w->cols;
1263         w_cols = w->rows;
1264         t_svd = 1;
1265     }
1266 
1267     u = (CvMat*)uarr;
1268     v = (CvMat*)varr;
1269 
1270     w_is_mat = w_cols > 1 && w_rows > 1;
1271 
1272     if( !w_is_mat && CV_IS_MAT_CONT(w->type) && w_cols + w_rows - 1 == n )
1273         tw = w->data.ptr;
1274 
1275     if( u )
1276     {
1277         if( !CV_IS_MAT( u ))
1278             CV_CALL( u = cvGetMat( u, &ustub ));
1279 
1280         if( !(flags & CV_SVD_U_T) )
1281         {
1282             u_rows = u->rows;
1283             u_cols = u->cols;
1284         }
1285         else
1286         {
1287             u_rows = u->cols;
1288             u_cols = u->rows;
1289         }
1290 
1291         if( !CV_ARE_TYPES_EQ( a, u ))
1292             CV_ERROR( CV_StsUnmatchedFormats, "" );
1293 
1294         if( u_rows != m || (u_cols != m && u_cols != n))
1295             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U matrix has unappropriate size" :
1296                                                      "V matrix has unappropriate size" );
1297 
1298         temp_u = (u_rows != u_cols && !(flags & CV_SVD_U_T)) || u->data.ptr==a->data.ptr;
1299 
1300         if( w_is_mat && u_cols != w_rows )
1301             CV_ERROR( CV_StsUnmatchedSizes, !t_svd ? "U and W have incompatible sizes" :
1302                                                      "V and W have incompatible sizes" );
1303     }
1304     else
1305     {
1306         u = &ustub;
1307         u->data.ptr = 0;
1308         u->step = 0;
1309     }
1310 
1311     if( v )
1312     {
1313         int v_rows, v_cols;
1314 
1315         if( !CV_IS_MAT( v ))
1316             CV_CALL( v = cvGetMat( v, &vstub ));
1317 
1318         if( !(flags & CV_SVD_V_T) )
1319         {
1320             v_rows = v->rows;
1321             v_cols = v->cols;
1322         }
1323         else
1324         {
1325             v_rows = v->cols;
1326             v_cols = v->rows;
1327         }
1328 
1329         if( !CV_ARE_TYPES_EQ( a, v ))
1330             CV_ERROR( CV_StsUnmatchedFormats, "" );
1331 
1332         if( v_rows != n || v_cols != n )
1333             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U matrix has unappropriate size" :
1334                                                     "V matrix has unappropriate size" );
1335 
1336         if( w_is_mat && w_cols != v_cols )
1337             CV_ERROR( CV_StsUnmatchedSizes, t_svd ? "U and W have incompatible sizes" :
1338                                                     "V and W have incompatible sizes" );
1339     }
1340     else
1341     {
1342         v = &vstub;
1343         v->data.ptr = 0;
1344         v->step = 0;
1345     }
1346 
1347     type = CV_MAT_TYPE( a->type );
1348     pix_size = CV_ELEM_SIZE(type);
1349     buf_size = n*2 + m;
1350 
1351     if( !(flags & CV_SVD_MODIFY_A) )
1352     {
1353         a_buf_offset = buf_size;
1354         buf_size += a->rows*a->cols;
1355     }
1356 
1357     if( temp_u )
1358     {
1359         u_buf_offset = buf_size;
1360         buf_size += u->rows*u->cols;
1361     }
1362 
1363     buf_size *= pix_size;
1364 
1365     if( buf_size <= CV_MAX_LOCAL_SIZE )
1366     {
1367         buffer = (uchar*)cvStackAlloc( buf_size );
1368         local_alloc = 1;
1369     }
1370     else
1371     {
1372         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1373     }
1374 
1375     if( !(flags & CV_SVD_MODIFY_A) )
1376     {
1377         cvInitMatHeader( &tmat, m, n, type,
1378                          buffer + a_buf_offset*pix_size );
1379         if( !t_svd )
1380             cvCopy( a, &tmat );
1381         else
1382             cvT( a, &tmat );
1383         a = &tmat;
1384     }
1385 
1386     if( temp_u )
1387     {
1388         cvInitMatHeader( &ustub, u_cols, u_rows, type, buffer + u_buf_offset*pix_size );
1389         u = &ustub;
1390     }
1391 
1392     if( !tw )
1393         tw = buffer + (n + m)*pix_size;
1394 
1395     if( type == CV_32FC1 )
1396     {
1397         icvSVD_32f( a->data.fl, a->step/sizeof(float), a->rows, a->cols,
1398                    (float*)tw, u->data.fl, u->step/sizeof(float), u_cols,
1399                    v->data.fl, v->step/sizeof(float), (float*)buffer );
1400     }
1401     else if( type == CV_64FC1 )
1402     {
1403         icvSVD_64f( a->data.db, a->step/sizeof(double), a->rows, a->cols,
1404                     (double*)tw, u->data.db, u->step/sizeof(double), u_cols,
1405                     v->data.db, v->step/sizeof(double), (double*)buffer );
1406     }
1407     else
1408     {
1409         CV_ERROR( CV_StsUnsupportedFormat, "" );
1410     }
1411 
1412     if( tw != w->data.ptr )
1413     {
1414         int shift = w->cols != 1;
1415         cvSetZero( w );
1416         if( type == CV_32FC1 )
1417             for( int i = 0; i < n; i++ )
1418                 ((float*)(w->data.ptr + i*w->step))[i*shift] = ((float*)tw)[i];
1419         else
1420             for( int i = 0; i < n; i++ )
1421                 ((double*)(w->data.ptr + i*w->step))[i*shift] = ((double*)tw)[i];
1422     }
1423 
1424     if( uarr )
1425     {
1426         if( !(flags & CV_SVD_U_T))
1427             cvT( u, uarr );
1428         else if( temp_u )
1429             cvCopy( u, uarr );
1430         /*CV_CHECK_NANS( uarr );*/
1431     }
1432 
1433     if( varr )
1434     {
1435         if( !(flags & CV_SVD_V_T))
1436             cvT( v, varr );
1437         /*CV_CHECK_NANS( varr );*/
1438     }
1439 
1440     CV_CHECK_NANS( w );
1441 
1442     __END__;
1443 
1444     if( buffer && !local_alloc )
1445         cvFree( &buffer );
1446 }
1447 
1448 
1449 CV_IMPL void
cvSVBkSb(const CvArr * warr,const CvArr * uarr,const CvArr * varr,const CvArr * barr,CvArr * xarr,int flags)1450 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
1451           const CvArr* varr, const CvArr* barr,
1452           CvArr* xarr, int flags )
1453 {
1454     uchar* buffer = 0;
1455     int local_alloc = 0;
1456 
1457     CV_FUNCNAME( "cvSVBkSb" );
1458 
1459     __BEGIN__;
1460 
1461     CvMat wstub, *w = (CvMat*)warr;
1462     CvMat bstub, *b = (CvMat*)barr;
1463     CvMat xstub, *x = (CvMat*)xarr;
1464     CvMat ustub, ustub2, *u = (CvMat*)uarr;
1465     CvMat vstub, vstub2, *v = (CvMat*)varr;
1466     uchar* tw = 0;
1467     int type;
1468     int temp_u = 0, temp_v = 0;
1469     int u_buf_offset = 0, v_buf_offset = 0, w_buf_offset = 0, t_buf_offset = 0;
1470     int buf_size = 0, pix_size;
1471     int m, n, nm;
1472     int u_rows, u_cols;
1473     int v_rows, v_cols;
1474 
1475     if( !CV_IS_MAT( w ))
1476         CV_CALL( w = cvGetMat( w, &wstub ));
1477 
1478     if( !CV_IS_MAT( u ))
1479         CV_CALL( u = cvGetMat( u, &ustub ));
1480 
1481     if( !CV_IS_MAT( v ))
1482         CV_CALL( v = cvGetMat( v, &vstub ));
1483 
1484     if( !CV_IS_MAT( x ))
1485         CV_CALL( x = cvGetMat( x, &xstub ));
1486 
1487     if( !CV_ARE_TYPES_EQ( w, u ) || !CV_ARE_TYPES_EQ( w, v ) || !CV_ARE_TYPES_EQ( w, x ))
1488         CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
1489 
1490     type = CV_MAT_TYPE( w->type );
1491     pix_size = CV_ELEM_SIZE(type);
1492 
1493     if( !(flags & CV_SVD_U_T) )
1494     {
1495         temp_u = 1;
1496         u_buf_offset = buf_size;
1497         buf_size += u->cols*u->rows*pix_size;
1498         u_rows = u->rows;
1499         u_cols = u->cols;
1500     }
1501     else
1502     {
1503         u_rows = u->cols;
1504         u_cols = u->rows;
1505     }
1506 
1507     if( !(flags & CV_SVD_V_T) )
1508     {
1509         temp_v = 1;
1510         v_buf_offset = buf_size;
1511         buf_size += v->cols*v->rows*pix_size;
1512         v_rows = v->rows;
1513         v_cols = v->cols;
1514     }
1515     else
1516     {
1517         v_rows = v->cols;
1518         v_cols = v->rows;
1519     }
1520 
1521     m = u_rows;
1522     n = v_rows;
1523     nm = MIN(n,m);
1524 
1525     if( (u_rows != u_cols && v_rows != v_cols) || x->rows != v_rows )
1526         CV_ERROR( CV_StsBadSize, "V or U matrix must be square" );
1527 
1528     if( (w->rows == 1 || w->cols == 1) && w->rows + w->cols - 1 == nm )
1529     {
1530         if( CV_IS_MAT_CONT(w->type) )
1531             tw = w->data.ptr;
1532         else
1533         {
1534             w_buf_offset = buf_size;
1535             buf_size += nm*pix_size;
1536         }
1537     }
1538     else
1539     {
1540         if( w->cols != v_cols || w->rows != u_cols )
1541             CV_ERROR( CV_StsBadSize, "W must be 1d array of MIN(m,n) elements or "
1542                                     "matrix which size matches to U and V" );
1543         w_buf_offset = buf_size;
1544         buf_size += nm*pix_size;
1545     }
1546 
1547     if( b )
1548     {
1549         if( !CV_IS_MAT( b ))
1550             CV_CALL( b = cvGetMat( b, &bstub ));
1551         if( !CV_ARE_TYPES_EQ( w, b ))
1552             CV_ERROR( CV_StsUnmatchedFormats, "All matrices must have the same type" );
1553         if( b->cols != x->cols || b->rows != m )
1554             CV_ERROR( CV_StsUnmatchedSizes, "b matrix must have (m x x->cols) size" );
1555     }
1556     else
1557     {
1558         b = &bstub;
1559         memset( b, 0, sizeof(*b));
1560     }
1561 
1562     t_buf_offset = buf_size;
1563     buf_size += (MAX(m,n) + b->cols)*pix_size;
1564 
1565     if( buf_size <= CV_MAX_LOCAL_SIZE )
1566     {
1567         buffer = (uchar*)cvStackAlloc( buf_size );
1568         local_alloc = 1;
1569     }
1570     else
1571         CV_CALL( buffer = (uchar*)cvAlloc( buf_size ));
1572 
1573     if( temp_u )
1574     {
1575         cvInitMatHeader( &ustub2, u_cols, u_rows, type, buffer + u_buf_offset );
1576         cvT( u, &ustub2 );
1577         u = &ustub2;
1578     }
1579 
1580     if( temp_v )
1581     {
1582         cvInitMatHeader( &vstub2, v_cols, v_rows, type, buffer + v_buf_offset );
1583         cvT( v, &vstub2 );
1584         v = &vstub2;
1585     }
1586 
1587     if( !tw )
1588     {
1589         int i, shift = w->cols > 1 ? pix_size : 0;
1590         tw = buffer + w_buf_offset;
1591         for( i = 0; i < nm; i++ )
1592             memcpy( tw + i*pix_size, w->data.ptr + i*(w->step + shift), pix_size );
1593     }
1594 
1595     if( type == CV_32FC1 )
1596     {
1597         icvSVBkSb_32f( m, n, (float*)tw, u->data.fl, u->step/sizeof(float),
1598                        v->data.fl, v->step/sizeof(float),
1599                        b->data.fl, b->step/sizeof(float), b->cols,
1600                        x->data.fl, x->step/sizeof(float),
1601                        (float*)(buffer + t_buf_offset) );
1602     }
1603     else if( type == CV_64FC1 )
1604     {
1605         icvSVBkSb_64f( m, n, (double*)tw, u->data.db, u->step/sizeof(double),
1606                        v->data.db, v->step/sizeof(double),
1607                        b->data.db, b->step/sizeof(double), b->cols,
1608                        x->data.db, x->step/sizeof(double),
1609                        (double*)(buffer + t_buf_offset) );
1610     }
1611     else
1612     {
1613         CV_ERROR( CV_StsUnsupportedFormat, "" );
1614     }
1615 
1616     __END__;
1617 
1618     if( buffer && !local_alloc )
1619         cvFree( &buffer );
1620 }
1621 
1622 /* End of file. */
1623