1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // Intel License Agreement
11 //
12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
13 // Third party copyrights are property of their respective owners.
14 //
15 // Redistribution and use in source and binary forms, with or without modification,
16 // are permitted provided that the following conditions are met:
17 //
18 // * Redistribution's of source code must retain the above copyright notice,
19 // this list of conditions and the following disclaimer.
20 //
21 // * Redistribution's in binary form must reproduce the above copyright notice,
22 // this list of conditions and the following disclaimer in the documentation
23 // and/or other materials provided with the distribution.
24 //
25 // * The name of Intel Corporation may not be used to endorse or promote products
26 // derived from this software without specific prior written permission.
27 //
28 // This software is provided by the copyright holders and contributors "as is" and
29 // any express or implied warranties, including, but not limited to, the implied
30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
31 // In no event shall the Intel Corporation or contributors be liable for any direct,
32 // indirect, incidental, special, exemplary, or consequential damages
33 // (including, but not limited to, procurement of substitute goods or services;
34 // loss of use, data, or profits; or business interruption) however caused
35 // and on any theory of liability, whether in contract, strict liability,
36 // or tort (including negligence or otherwise) arising in any way out of
37 // the use of this software, even if advised of the possibility of such damage.
38 //
39 //M*/
40
41 #include "precomp.hpp"
42
43 namespace cv { namespace ml {
44
45 struct AnnParams
46 {
AnnParamscv::ml::AnnParams47 AnnParams()
48 {
49 termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
50 trainMethod = ANN_MLP::RPROP;
51 bpDWScale = bpMomentScale = 0.1;
52 rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
53 rpDWMin = FLT_EPSILON; rpDWMax = 50.;
54 }
55
56 TermCriteria termCrit;
57 int trainMethod;
58
59 double bpDWScale;
60 double bpMomentScale;
61
62 double rpDW0;
63 double rpDWPlus;
64 double rpDWMinus;
65 double rpDWMin;
66 double rpDWMax;
67 };
68
69 template <typename T>
inBounds(T val,T min_val,T max_val)70 inline T inBounds(T val, T min_val, T max_val)
71 {
72 return std::min(std::max(val, min_val), max_val);
73 }
74
75 class ANN_MLPImpl : public ANN_MLP
76 {
77 public:
ANN_MLPImpl()78 ANN_MLPImpl()
79 {
80 clear();
81 setActivationFunction( SIGMOID_SYM, 0, 0 );
82 setLayerSizes(Mat());
83 setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
84 }
85
~ANN_MLPImpl()86 virtual ~ANN_MLPImpl() {}
87
88 CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.termCrit)
89 CV_IMPL_PROPERTY(double, BackpropWeightScale, params.bpDWScale)
90 CV_IMPL_PROPERTY(double, BackpropMomentumScale, params.bpMomentScale)
91 CV_IMPL_PROPERTY(double, RpropDW0, params.rpDW0)
92 CV_IMPL_PROPERTY(double, RpropDWPlus, params.rpDWPlus)
93 CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
94 CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
95 CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
96
clear()97 void clear()
98 {
99 min_val = max_val = min_val1 = max_val1 = 0.;
100 rng = RNG((uint64)-1);
101 weights.clear();
102 trained = false;
103 max_buf_sz = 1 << 12;
104 }
105
layer_count() const106 int layer_count() const { return (int)layer_sizes.size(); }
107
setTrainMethod(int method,double param1,double param2)108 void setTrainMethod(int method, double param1, double param2)
109 {
110 if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP)
111 method = ANN_MLP::RPROP;
112 params.trainMethod = method;
113 if(method == ANN_MLP::RPROP )
114 {
115 if( param1 < FLT_EPSILON )
116 param1 = 1.;
117 params.rpDW0 = param1;
118 params.rpDWMin = std::max( param2, 0. );
119 }
120 else if(method == ANN_MLP::BACKPROP )
121 {
122 if( param1 <= 0 )
123 param1 = 0.1;
124 params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
125 if( param2 < 0 )
126 param2 = 0.1;
127 params.bpMomentScale = std::min( param2, 1. );
128 }
129 }
130
getTrainMethod() const131 int getTrainMethod() const
132 {
133 return params.trainMethod;
134 }
135
setActivationFunction(int _activ_func,double _f_param1,double _f_param2)136 void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 )
137 {
138 if( _activ_func < 0 || _activ_func > GAUSSIAN )
139 CV_Error( CV_StsOutOfRange, "Unknown activation function" );
140
141 activ_func = _activ_func;
142
143 switch( activ_func )
144 {
145 case SIGMOID_SYM:
146 max_val = 0.95; min_val = -max_val;
147 max_val1 = 0.98; min_val1 = -max_val1;
148 if( fabs(_f_param1) < FLT_EPSILON )
149 _f_param1 = 2./3;
150 if( fabs(_f_param2) < FLT_EPSILON )
151 _f_param2 = 1.7159;
152 break;
153 case GAUSSIAN:
154 max_val = 1.; min_val = 0.05;
155 max_val1 = 1.; min_val1 = 0.02;
156 if( fabs(_f_param1) < FLT_EPSILON )
157 _f_param1 = 1.;
158 if( fabs(_f_param2) < FLT_EPSILON )
159 _f_param2 = 1.;
160 break;
161 default:
162 min_val = max_val = min_val1 = max_val1 = 0.;
163 _f_param1 = 1.;
164 _f_param2 = 0.;
165 }
166
167 f_param1 = _f_param1;
168 f_param2 = _f_param2;
169 }
170
171
init_weights()172 void init_weights()
173 {
174 int i, j, k, l_count = layer_count();
175
176 for( i = 1; i < l_count; i++ )
177 {
178 int n1 = layer_sizes[i-1];
179 int n2 = layer_sizes[i];
180 double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
181 double* w = weights[i].ptr<double>();
182
183 // initialize weights using Nguyen-Widrow algorithm
184 for( j = 0; j < n2; j++ )
185 {
186 double s = 0;
187 for( k = 0; k <= n1; k++ )
188 {
189 val = rng.uniform(0., 1.)*2-1.;
190 w[k*n2 + j] = val;
191 s += fabs(val);
192 }
193
194 if( i < l_count - 1 )
195 {
196 s = 1./(s - fabs(val));
197 for( k = 0; k <= n1; k++ )
198 w[k*n2 + j] *= s;
199 w[n1*n2 + j] *= G*(-1+j*2./n2);
200 }
201 }
202 }
203 }
204
getLayerSizes() const205 Mat getLayerSizes() const
206 {
207 return Mat_<int>(layer_sizes, true);
208 }
209
setLayerSizes(InputArray _layer_sizes)210 void setLayerSizes( InputArray _layer_sizes )
211 {
212 clear();
213
214 _layer_sizes.copyTo(layer_sizes);
215 int l_count = layer_count();
216
217 weights.resize(l_count + 2);
218 max_lsize = 0;
219
220 if( l_count > 0 )
221 {
222 for( int i = 0; i < l_count; i++ )
223 {
224 int n = layer_sizes[i];
225 if( n < 1 + (0 < i && i < l_count-1))
226 CV_Error( CV_StsOutOfRange,
227 "there should be at least one input and one output "
228 "and every hidden layer must have more than 1 neuron" );
229 max_lsize = std::max( max_lsize, n );
230 if( i > 0 )
231 weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
232 }
233
234 int ninputs = layer_sizes.front();
235 int noutputs = layer_sizes.back();
236 weights[0].create(1, ninputs*2, CV_64F);
237 weights[l_count].create(1, noutputs*2, CV_64F);
238 weights[l_count+1].create(1, noutputs*2, CV_64F);
239 }
240 }
241
predict(InputArray _inputs,OutputArray _outputs,int) const242 float predict( InputArray _inputs, OutputArray _outputs, int ) const
243 {
244 if( !trained )
245 CV_Error( CV_StsError, "The network has not been trained or loaded" );
246
247 Mat inputs = _inputs.getMat();
248 int type = inputs.type(), l_count = layer_count();
249 int n = inputs.rows, dn0 = n;
250
251 CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
252 int noutputs = layer_sizes[l_count-1];
253 Mat outputs;
254
255 int min_buf_sz = 2*max_lsize;
256 int buf_sz = n*min_buf_sz;
257
258 if( buf_sz > max_buf_sz )
259 {
260 dn0 = max_buf_sz/min_buf_sz;
261 dn0 = std::max( dn0, 1 );
262 buf_sz = dn0*min_buf_sz;
263 }
264
265 cv::AutoBuffer<double> _buf(buf_sz+noutputs);
266 double* buf = _buf;
267
268 if( !_outputs.needed() )
269 {
270 CV_Assert( n == 1 );
271 outputs = Mat(n, noutputs, type, buf + buf_sz);
272 }
273 else
274 {
275 _outputs.create(n, noutputs, type);
276 outputs = _outputs.getMat();
277 }
278
279 int dn = 0;
280 for( int i = 0; i < n; i += dn )
281 {
282 dn = std::min( dn0, n - i );
283
284 Mat layer_in = inputs.rowRange(i, i + dn);
285 Mat layer_out( dn, layer_in.cols, CV_64F, buf);
286
287 scale_input( layer_in, layer_out );
288 layer_in = layer_out;
289
290 for( int j = 1; j < l_count; j++ )
291 {
292 double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
293 int cols = layer_sizes[j];
294
295 layer_out = Mat(dn, cols, CV_64F, data);
296 Mat w = weights[j].rowRange(0, layer_in.cols);
297 gemm(layer_in, w, 1, noArray(), 0, layer_out);
298 calc_activ_func( layer_out, weights[j] );
299
300 layer_in = layer_out;
301 }
302
303 layer_out = outputs.rowRange(i, i + dn);
304 scale_output( layer_in, layer_out );
305 }
306
307 if( n == 1 )
308 {
309 int maxIdx[] = {0, 0};
310 minMaxIdx(outputs, 0, 0, 0, maxIdx);
311 return (float)(maxIdx[0] + maxIdx[1]);
312 }
313
314 return 0.f;
315 }
316
scale_input(const Mat & _src,Mat & _dst) const317 void scale_input( const Mat& _src, Mat& _dst ) const
318 {
319 int cols = _src.cols;
320 const double* w = weights[0].ptr<double>();
321
322 if( _src.type() == CV_32F )
323 {
324 for( int i = 0; i < _src.rows; i++ )
325 {
326 const float* src = _src.ptr<float>(i);
327 double* dst = _dst.ptr<double>(i);
328 for( int j = 0; j < cols; j++ )
329 dst[j] = src[j]*w[j*2] + w[j*2+1];
330 }
331 }
332 else
333 {
334 for( int i = 0; i < _src.rows; i++ )
335 {
336 const float* src = _src.ptr<float>(i);
337 double* dst = _dst.ptr<double>(i);
338 for( int j = 0; j < cols; j++ )
339 dst[j] = src[j]*w[j*2] + w[j*2+1];
340 }
341 }
342 }
343
scale_output(const Mat & _src,Mat & _dst) const344 void scale_output( const Mat& _src, Mat& _dst ) const
345 {
346 int cols = _src.cols;
347 const double* w = weights[layer_count()].ptr<double>();
348
349 if( _dst.type() == CV_32F )
350 {
351 for( int i = 0; i < _src.rows; i++ )
352 {
353 const double* src = _src.ptr<double>(i);
354 float* dst = _dst.ptr<float>(i);
355 for( int j = 0; j < cols; j++ )
356 dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
357 }
358 }
359 else
360 {
361 for( int i = 0; i < _src.rows; i++ )
362 {
363 const double* src = _src.ptr<double>(i);
364 double* dst = _dst.ptr<double>(i);
365 for( int j = 0; j < cols; j++ )
366 dst[j] = src[j]*w[j*2] + w[j*2+1];
367 }
368 }
369 }
370
calc_activ_func(Mat & sums,const Mat & w) const371 void calc_activ_func( Mat& sums, const Mat& w ) const
372 {
373 const double* bias = w.ptr<double>(w.rows-1);
374 int i, j, n = sums.rows, cols = sums.cols;
375 double scale = 0, scale2 = f_param2;
376
377 switch( activ_func )
378 {
379 case IDENTITY:
380 scale = 1.;
381 break;
382 case SIGMOID_SYM:
383 scale = -f_param1;
384 break;
385 case GAUSSIAN:
386 scale = -f_param1*f_param1;
387 break;
388 default:
389 ;
390 }
391
392 CV_Assert( sums.isContinuous() );
393
394 if( activ_func != GAUSSIAN )
395 {
396 for( i = 0; i < n; i++ )
397 {
398 double* data = sums.ptr<double>(i);
399 for( j = 0; j < cols; j++ )
400 data[j] = (data[j] + bias[j])*scale;
401 }
402
403 if( activ_func == IDENTITY )
404 return;
405 }
406 else
407 {
408 for( i = 0; i < n; i++ )
409 {
410 double* data = sums.ptr<double>(i);
411 for( j = 0; j < cols; j++ )
412 {
413 double t = data[j] + bias[j];
414 data[j] = t*t*scale;
415 }
416 }
417 }
418
419 exp( sums, sums );
420
421 if( sums.isContinuous() )
422 {
423 cols *= n;
424 n = 1;
425 }
426
427 switch( activ_func )
428 {
429 case SIGMOID_SYM:
430 for( i = 0; i < n; i++ )
431 {
432 double* data = sums.ptr<double>(i);
433 for( j = 0; j < cols; j++ )
434 {
435 double t = scale2*(1. - data[j])/(1. + data[j]);
436 data[j] = t;
437 }
438 }
439 break;
440
441 case GAUSSIAN:
442 for( i = 0; i < n; i++ )
443 {
444 double* data = sums.ptr<double>(i);
445 for( j = 0; j < cols; j++ )
446 data[j] = scale2*data[j];
447 }
448 break;
449
450 default:
451 ;
452 }
453 }
454
calc_activ_func_deriv(Mat & _xf,Mat & _df,const Mat & w) const455 void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
456 {
457 const double* bias = w.ptr<double>(w.rows-1);
458 int i, j, n = _xf.rows, cols = _xf.cols;
459
460 if( activ_func == IDENTITY )
461 {
462 for( i = 0; i < n; i++ )
463 {
464 double* xf = _xf.ptr<double>(i);
465 double* df = _df.ptr<double>(i);
466
467 for( j = 0; j < cols; j++ )
468 {
469 xf[j] += bias[j];
470 df[j] = 1;
471 }
472 }
473 }
474 else if( activ_func == GAUSSIAN )
475 {
476 double scale = -f_param1*f_param1;
477 double scale2 = scale*f_param2;
478 for( i = 0; i < n; i++ )
479 {
480 double* xf = _xf.ptr<double>(i);
481 double* df = _df.ptr<double>(i);
482
483 for( j = 0; j < cols; j++ )
484 {
485 double t = xf[j] + bias[j];
486 df[j] = t*2*scale2;
487 xf[j] = t*t*scale;
488 }
489 }
490 exp( _xf, _xf );
491
492 for( i = 0; i < n; i++ )
493 {
494 double* xf = _xf.ptr<double>(i);
495 double* df = _df.ptr<double>(i);
496
497 for( j = 0; j < cols; j++ )
498 df[j] *= xf[j];
499 }
500 }
501 else
502 {
503 double scale = f_param1;
504 double scale2 = f_param2;
505
506 for( i = 0; i < n; i++ )
507 {
508 double* xf = _xf.ptr<double>(i);
509 double* df = _df.ptr<double>(i);
510
511 for( j = 0; j < cols; j++ )
512 {
513 xf[j] = (xf[j] + bias[j])*scale;
514 df[j] = -fabs(xf[j]);
515 }
516 }
517
518 exp( _df, _df );
519
520 // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
521 // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
522 // 2*a*exp(-ax)/(1+exp(-ax))^2
523 scale *= 2*f_param2;
524 for( i = 0; i < n; i++ )
525 {
526 double* xf = _xf.ptr<double>(i);
527 double* df = _df.ptr<double>(i);
528
529 for( j = 0; j < cols; j++ )
530 {
531 int s0 = xf[j] > 0 ? 1 : -1;
532 double t0 = 1./(1. + df[j]);
533 double t1 = scale*df[j]*t0*t0;
534 t0 *= scale2*(1. - df[j])*s0;
535 df[j] = t1;
536 xf[j] = t0;
537 }
538 }
539 }
540 }
541
calc_input_scale(const Mat & inputs,int flags)542 void calc_input_scale( const Mat& inputs, int flags )
543 {
544 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
545 bool no_scale = (flags & NO_INPUT_SCALE) != 0;
546 double* scale = weights[0].ptr<double>();
547 int count = inputs.rows;
548
549 if( reset_weights )
550 {
551 int i, j, vcount = layer_sizes[0];
552 int type = inputs.type();
553 double a = no_scale ? 1. : 0.;
554
555 for( j = 0; j < vcount; j++ )
556 scale[2*j] = a, scale[j*2+1] = 0.;
557
558 if( no_scale )
559 return;
560
561 for( i = 0; i < count; i++ )
562 {
563 const uchar* p = inputs.ptr(i);
564 const float* f = (const float*)p;
565 const double* d = (const double*)p;
566 for( j = 0; j < vcount; j++ )
567 {
568 double t = type == CV_32F ? (double)f[j] : d[j];
569 scale[j*2] += t;
570 scale[j*2+1] += t*t;
571 }
572 }
573
574 for( j = 0; j < vcount; j++ )
575 {
576 double s = scale[j*2], s2 = scale[j*2+1];
577 double m = s/count, sigma2 = s2/count - m*m;
578 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
579 scale[j*2+1] = -m*scale[j*2];
580 }
581 }
582 }
583
calc_output_scale(const Mat & outputs,int flags)584 void calc_output_scale( const Mat& outputs, int flags )
585 {
586 int i, j, vcount = layer_sizes.back();
587 int type = outputs.type();
588 double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
589 bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
590 bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
591 int l_count = layer_count();
592 double* scale = weights[l_count].ptr<double>();
593 double* inv_scale = weights[l_count+1].ptr<double>();
594 int count = outputs.rows;
595
596 if( reset_weights )
597 {
598 double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
599
600 for( j = 0; j < vcount; j++ )
601 {
602 scale[2*j] = inv_scale[2*j] = a0;
603 scale[j*2+1] = inv_scale[2*j+1] = b0;
604 }
605
606 if( no_scale )
607 return;
608 }
609
610 for( i = 0; i < count; i++ )
611 {
612 const uchar* p = outputs.ptr(i);
613 const float* f = (const float*)p;
614 const double* d = (const double*)p;
615
616 for( j = 0; j < vcount; j++ )
617 {
618 double t = type == CV_32F ? (double)f[j] : d[j];
619
620 if( reset_weights )
621 {
622 double mj = scale[j*2], Mj = scale[j*2+1];
623 if( mj > t ) mj = t;
624 if( Mj < t ) Mj = t;
625
626 scale[j*2] = mj;
627 scale[j*2+1] = Mj;
628 }
629 else if( !no_scale )
630 {
631 t = t*inv_scale[j*2] + inv_scale[2*j+1];
632 if( t < m1 || t > M1 )
633 CV_Error( CV_StsOutOfRange,
634 "Some of new output training vector components run exceed the original range too much" );
635 }
636 }
637 }
638
639 if( reset_weights )
640 for( j = 0; j < vcount; j++ )
641 {
642 // map mj..Mj to m..M
643 double mj = scale[j*2], Mj = scale[j*2+1];
644 double a, b;
645 double delta = Mj - mj;
646 if( delta < DBL_EPSILON )
647 a = 1, b = (M + m - Mj - mj)*0.5;
648 else
649 a = (M - m)/delta, b = m - mj*a;
650 inv_scale[j*2] = a; inv_scale[j*2+1] = b;
651 a = 1./a; b = -b*a;
652 scale[j*2] = a; scale[j*2+1] = b;
653 }
654 }
655
prepare_to_train(const Mat & inputs,const Mat & outputs,Mat & sample_weights,int flags)656 void prepare_to_train( const Mat& inputs, const Mat& outputs,
657 Mat& sample_weights, int flags )
658 {
659 if( layer_sizes.empty() )
660 CV_Error( CV_StsError,
661 "The network has not been created. Use method create or the appropriate constructor" );
662
663 if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
664 inputs.cols != layer_sizes[0] )
665 CV_Error( CV_StsBadArg,
666 "input training data should be a floating-point matrix with "
667 "the number of rows equal to the number of training samples and "
668 "the number of columns equal to the size of 0-th (input) layer" );
669
670 if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
671 outputs.cols != layer_sizes.back() )
672 CV_Error( CV_StsBadArg,
673 "output training data should be a floating-point matrix with "
674 "the number of rows equal to the number of training samples and "
675 "the number of columns equal to the size of last (output) layer" );
676
677 if( inputs.rows != outputs.rows )
678 CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
679
680 Mat temp;
681 double s = sum(sample_weights)[0];
682 sample_weights.convertTo(temp, CV_64F, 1./s);
683 sample_weights = temp;
684
685 calc_input_scale( inputs, flags );
686 calc_output_scale( outputs, flags );
687 }
688
train(const Ptr<TrainData> & trainData,int flags)689 bool train( const Ptr<TrainData>& trainData, int flags )
690 {
691 const int MAX_ITER = 1000;
692 const double DEFAULT_EPSILON = FLT_EPSILON;
693
694 // initialize training data
695 Mat inputs = trainData->getTrainSamples();
696 Mat outputs = trainData->getTrainResponses();
697 Mat sw = trainData->getTrainSampleWeights();
698 prepare_to_train( inputs, outputs, sw, flags );
699
700 // ... and link weights
701 if( !(flags & UPDATE_WEIGHTS) )
702 init_weights();
703
704 TermCriteria termcrit;
705 termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
706 termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
707 termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
708
709 int iter = params.trainMethod == ANN_MLP::BACKPROP ?
710 train_backprop( inputs, outputs, sw, termcrit ) :
711 train_rprop( inputs, outputs, sw, termcrit );
712
713 trained = iter > 0;
714 return trained;
715 }
716
train_backprop(const Mat & inputs,const Mat & outputs,const Mat & _sw,TermCriteria termCrit)717 int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
718 {
719 int i, j, k;
720 double prev_E = DBL_MAX*0.5, E = 0;
721 int itype = inputs.type(), otype = outputs.type();
722
723 int count = inputs.rows;
724
725 int iter = -1, max_iter = termCrit.maxCount*count;
726 double epsilon = termCrit.epsilon*count;
727
728 int l_count = layer_count();
729 int ivcount = layer_sizes[0];
730 int ovcount = layer_sizes.back();
731
732 // allocate buffers
733 vector<vector<double> > x(l_count);
734 vector<vector<double> > df(l_count);
735 vector<Mat> dw(l_count);
736
737 for( i = 0; i < l_count; i++ )
738 {
739 int n = layer_sizes[i];
740 x[i].resize(n+1);
741 df[i].resize(n);
742 dw[i] = Mat::zeros(weights[i].size(), CV_64F);
743 }
744
745 Mat _idx_m(1, count, CV_32S);
746 int* _idx = _idx_m.ptr<int>();
747 for( i = 0; i < count; i++ )
748 _idx[i] = i;
749
750 AutoBuffer<double> _buf(max_lsize*2);
751 double* buf[] = { _buf, (double*)_buf + max_lsize };
752
753 const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
754
755 // run back-propagation loop
756 /*
757 y_i = w_i*x_{i-1}
758 x_i = f(y_i)
759 E = 1/2*||u - x_N||^2
760 grad_N = (x_N - u)*f'(y_i)
761 dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
762 w_i(t+1) = w_i(t) + dw_i(t)
763 grad_{i-1} = w_i^t*grad_i
764 */
765 for( iter = 0; iter < max_iter; iter++ )
766 {
767 int idx = iter % count;
768 double sweight = sw ? count*sw[idx] : 1.;
769
770 if( idx == 0 )
771 {
772 //printf("%d. E = %g\n", iter/count, E);
773 if( fabs(prev_E - E) < epsilon )
774 break;
775 prev_E = E;
776 E = 0;
777
778 // shuffle indices
779 for( i = 0; i < count; i++ )
780 {
781 j = rng.uniform(0, count);
782 k = rng.uniform(0, count);
783 std::swap(_idx[j], _idx[k]);
784 }
785 }
786
787 idx = _idx[idx];
788
789 const uchar* x0data_p = inputs.ptr(idx);
790 const float* x0data_f = (const float*)x0data_p;
791 const double* x0data_d = (const double*)x0data_p;
792
793 double* w = weights[0].ptr<double>();
794 for( j = 0; j < ivcount; j++ )
795 x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
796
797 Mat x1( 1, ivcount, CV_64F, &x[0][0] );
798
799 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
800 for( i = 1; i < l_count; i++ )
801 {
802 int n = layer_sizes[i];
803 Mat x2(1, n, CV_64F, &x[i][0] );
804 Mat _w = weights[i].rowRange(0, x1.cols);
805 gemm(x1, _w, 1, noArray(), 0, x2);
806 Mat _df(1, n, CV_64F, &df[i][0] );
807 calc_activ_func_deriv( x2, _df, weights[i] );
808 x1 = x2;
809 }
810
811 Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
812 w = weights[l_count+1].ptr<double>();
813
814 // calculate error
815 const uchar* udata_p = outputs.ptr(idx);
816 const float* udata_f = (const float*)udata_p;
817 const double* udata_d = (const double*)udata_p;
818
819 double* gdata = grad1.ptr<double>();
820 for( k = 0; k < ovcount; k++ )
821 {
822 double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
823 gdata[k] = t*sweight;
824 E += t*t;
825 }
826 E *= sweight;
827
828 // backward pass, update weights
829 for( i = l_count-1; i > 0; i-- )
830 {
831 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
832 Mat _df(1, n2, CV_64F, &df[i][0]);
833 multiply( grad1, _df, grad1 );
834 Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
835 x[i-1][n1] = 1.;
836 gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
837 add( weights[i], dw[i], weights[i] );
838 if( i > 1 )
839 {
840 Mat grad2(1, n1, CV_64F, buf[i&1]);
841 Mat _w = weights[i].rowRange(0, n1);
842 gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
843 grad1 = grad2;
844 }
845 }
846 }
847
848 iter /= count;
849 return iter;
850 }
851
852 struct RPropLoop : public ParallelLoopBody
853 {
RPropLoopcv::ml::ANN_MLPImpl::RPropLoop854 RPropLoop(ANN_MLPImpl* _ann,
855 const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
856 int _dcount0, vector<Mat>& _dEdw, double* _E)
857 {
858 ann = _ann;
859 inputs = _inputs;
860 outputs = _outputs;
861 sw = _sw.ptr<double>();
862 dcount0 = _dcount0;
863 dEdw = &_dEdw;
864 pE = _E;
865 }
866
867 ANN_MLPImpl* ann;
868 vector<Mat>* dEdw;
869 Mat inputs, outputs;
870 const double* sw;
871 int dcount0;
872 double* pE;
873
operator ()cv::ml::ANN_MLPImpl::RPropLoop874 void operator()( const Range& range ) const
875 {
876 double inv_count = 1./inputs.rows;
877 int ivcount = ann->layer_sizes.front();
878 int ovcount = ann->layer_sizes.back();
879 int itype = inputs.type(), otype = outputs.type();
880 int count = inputs.rows;
881 int i, j, k, l_count = ann->layer_count();
882 vector<vector<double> > x(l_count);
883 vector<vector<double> > df(l_count);
884 vector<double> _buf(ann->max_lsize*dcount0*2);
885 double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
886 double E = 0;
887
888 for( i = 0; i < l_count; i++ )
889 {
890 x[i].resize(ann->layer_sizes[i]*dcount0);
891 df[i].resize(ann->layer_sizes[i]*dcount0);
892 }
893
894 for( int si = range.start; si < range.end; si++ )
895 {
896 int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
897 int dcount = i1 - i0;
898 const double* w = ann->weights[0].ptr<double>();
899
900 // grab and preprocess input data
901 for( i = 0; i < dcount; i++ )
902 {
903 const uchar* x0data_p = inputs.ptr(i0 + i);
904 const float* x0data_f = (const float*)x0data_p;
905 const double* x0data_d = (const double*)x0data_p;
906
907 double* xdata = &x[0][i*ivcount];
908 for( j = 0; j < ivcount; j++ )
909 xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
910 }
911 Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
912
913 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
914 for( i = 1; i < l_count; i++ )
915 {
916 Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
917 Mat _w = ann->weights[i].rowRange(0, x1.cols);
918 gemm( x1, _w, 1, noArray(), 0, x2 );
919 Mat _df( x2.size(), CV_64F, &df[i][0] );
920 ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
921 x1 = x2;
922 }
923
924 Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
925
926 w = ann->weights[l_count+1].ptr<double>();
927
928 // calculate error
929 for( i = 0; i < dcount; i++ )
930 {
931 const uchar* udata_p = outputs.ptr(i0+i);
932 const float* udata_f = (const float*)udata_p;
933 const double* udata_d = (const double*)udata_p;
934
935 const double* xdata = &x[l_count-1][i*ovcount];
936 double* gdata = grad1.ptr<double>(i);
937 double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
938
939 for( j = 0; j < ovcount; j++ )
940 {
941 double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
942 gdata[j] = t*sweight;
943 E1 += t*t;
944 }
945 E += sweight*E1;
946 }
947
948 for( i = l_count-1; i > 0; i-- )
949 {
950 int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
951 Mat _df(dcount, n2, CV_64F, &df[i][0]);
952 multiply(grad1, _df, grad1);
953
954 {
955 AutoLock lock(ann->mtx);
956 Mat _dEdw = dEdw->at(i).rowRange(0, n1);
957 x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
958 gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
959
960 // update bias part of dEdw
961 double* dst = dEdw->at(i).ptr<double>(n1);
962 for( k = 0; k < dcount; k++ )
963 {
964 const double* src = grad1.ptr<double>(k);
965 for( j = 0; j < n2; j++ )
966 dst[j] += src[j];
967 }
968 }
969
970 Mat grad2( dcount, n1, CV_64F, buf[i&1] );
971 if( i > 1 )
972 {
973 Mat _w = ann->weights[i].rowRange(0, n1);
974 gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
975 }
976 grad1 = grad2;
977 }
978 }
979 {
980 AutoLock lock(ann->mtx);
981 *pE += E;
982 }
983 }
984 };
985
train_rprop(const Mat & inputs,const Mat & outputs,const Mat & _sw,TermCriteria termCrit)986 int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
987 {
988 const int max_buf_size = 1 << 16;
989 int i, iter = -1, count = inputs.rows;
990
991 double prev_E = DBL_MAX*0.5;
992
993 int max_iter = termCrit.maxCount;
994 double epsilon = termCrit.epsilon;
995 double dw_plus = params.rpDWPlus;
996 double dw_minus = params.rpDWMinus;
997 double dw_min = params.rpDWMin;
998 double dw_max = params.rpDWMax;
999
1000 int l_count = layer_count();
1001
1002 // allocate buffers
1003 vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1004
1005 int total = 0;
1006 for( i = 0; i < l_count; i++ )
1007 {
1008 total += layer_sizes[i];
1009 dw[i].create(weights[i].size(), CV_64F);
1010 dw[i].setTo(Scalar::all(params.rpDW0));
1011 prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1012 dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1013 }
1014
1015 int dcount0 = max_buf_size/(2*total);
1016 dcount0 = std::max( dcount0, 1 );
1017 dcount0 = std::min( dcount0, count );
1018 int chunk_count = (count + dcount0 - 1)/dcount0;
1019
1020 // run rprop loop
1021 /*
1022 y_i(t) = w_i(t)*x_{i-1}(t)
1023 x_i(t) = f(y_i(t))
1024 E = sum_over_all_samples(1/2*||u - x_N||^2)
1025 grad_N = (x_N - u)*f'(y_i)
1026
1027 std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1028 dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1029 dw_i{jk}(t-1) else
1030
1031 if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1032 dE/dw_i{jk}(t)<-0
1033 else
1034 w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1035 grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1036 */
1037 for( iter = 0; iter < max_iter; iter++ )
1038 {
1039 double E = 0;
1040
1041 for( i = 0; i < l_count; i++ )
1042 dEdw[i].setTo(Scalar::all(0));
1043
1044 // first, iterate through all the samples and compute dEdw
1045 RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1046 parallel_for_(Range(0, chunk_count), invoker);
1047 //invoker(Range(0, chunk_count));
1048
1049 // now update weights
1050 for( i = 1; i < l_count; i++ )
1051 {
1052 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1053 for( int k = 0; k <= n1; k++ )
1054 {
1055 CV_Assert(weights[i].size() == Size(n2, n1+1));
1056 double* wk = weights[i].ptr<double>(k);
1057 double* dwk = dw[i].ptr<double>(k);
1058 double* dEdwk = dEdw[i].ptr<double>(k);
1059 schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1060
1061 for( int j = 0; j < n2; j++ )
1062 {
1063 double Eval = dEdwk[j];
1064 double dval = dwk[j];
1065 double wval = wk[j];
1066 int s = CV_SIGN(Eval);
1067 int ss = prevEk[j]*s;
1068 if( ss > 0 )
1069 {
1070 dval *= dw_plus;
1071 dval = std::min( dval, dw_max );
1072 dwk[j] = dval;
1073 wk[j] = wval + dval*s;
1074 }
1075 else if( ss < 0 )
1076 {
1077 dval *= dw_minus;
1078 dval = std::max( dval, dw_min );
1079 prevEk[j] = 0;
1080 dwk[j] = dval;
1081 wk[j] = wval + dval*s;
1082 }
1083 else
1084 {
1085 prevEk[j] = (schar)s;
1086 wk[j] = wval + dval*s;
1087 }
1088 dEdwk[j] = 0.;
1089 }
1090 }
1091 }
1092
1093 //printf("%d. E = %g\n", iter, E);
1094 if( fabs(prev_E - E) < epsilon )
1095 break;
1096 prev_E = E;
1097 }
1098
1099 return iter;
1100 }
1101
write_params(FileStorage & fs) const1102 void write_params( FileStorage& fs ) const
1103 {
1104 const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1105 activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1106 activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
1107
1108 if( activ_func_name )
1109 fs << "activation_function" << activ_func_name;
1110 else
1111 fs << "activation_function_id" << activ_func;
1112
1113 if( activ_func != IDENTITY )
1114 {
1115 fs << "f_param1" << f_param1;
1116 fs << "f_param2" << f_param2;
1117 }
1118
1119 fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1120
1121 fs << "training_params" << "{";
1122 if( params.trainMethod == ANN_MLP::BACKPROP )
1123 {
1124 fs << "train_method" << "BACKPROP";
1125 fs << "dw_scale" << params.bpDWScale;
1126 fs << "moment_scale" << params.bpMomentScale;
1127 }
1128 else if( params.trainMethod == ANN_MLP::RPROP )
1129 {
1130 fs << "train_method" << "RPROP";
1131 fs << "dw0" << params.rpDW0;
1132 fs << "dw_plus" << params.rpDWPlus;
1133 fs << "dw_minus" << params.rpDWMinus;
1134 fs << "dw_min" << params.rpDWMin;
1135 fs << "dw_max" << params.rpDWMax;
1136 }
1137 else
1138 CV_Error(CV_StsError, "Unknown training method");
1139
1140 fs << "term_criteria" << "{";
1141 if( params.termCrit.type & TermCriteria::EPS )
1142 fs << "epsilon" << params.termCrit.epsilon;
1143 if( params.termCrit.type & TermCriteria::COUNT )
1144 fs << "iterations" << params.termCrit.maxCount;
1145 fs << "}" << "}";
1146 }
1147
write(FileStorage & fs) const1148 void write( FileStorage& fs ) const
1149 {
1150 if( layer_sizes.empty() )
1151 return;
1152 int i, l_count = layer_count();
1153
1154 fs << "layer_sizes" << layer_sizes;
1155
1156 write_params( fs );
1157
1158 size_t esz = weights[0].elemSize();
1159
1160 fs << "input_scale" << "[";
1161 fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1162
1163 fs << "]" << "output_scale" << "[";
1164 fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1165
1166 fs << "]" << "inv_output_scale" << "[";
1167 fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1168
1169 fs << "]" << "weights" << "[";
1170 for( i = 1; i < l_count; i++ )
1171 {
1172 fs << "[";
1173 fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1174 fs << "]";
1175 }
1176 fs << "]";
1177 }
1178
read_params(const FileNode & fn)1179 void read_params( const FileNode& fn )
1180 {
1181 String activ_func_name = (String)fn["activation_function"];
1182 if( !activ_func_name.empty() )
1183 {
1184 activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1185 activ_func_name == "IDENTITY" ? IDENTITY :
1186 activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1187 CV_Assert( activ_func >= 0 );
1188 }
1189 else
1190 activ_func = (int)fn["activation_function_id"];
1191
1192 f_param1 = (double)fn["f_param1"];
1193 f_param2 = (double)fn["f_param2"];
1194
1195 setActivationFunction( activ_func, f_param1, f_param2 );
1196
1197 min_val = (double)fn["min_val"];
1198 max_val = (double)fn["max_val"];
1199 min_val1 = (double)fn["min_val1"];
1200 max_val1 = (double)fn["max_val1"];
1201
1202 FileNode tpn = fn["training_params"];
1203 params = AnnParams();
1204
1205 if( !tpn.empty() )
1206 {
1207 String tmethod_name = (String)tpn["train_method"];
1208
1209 if( tmethod_name == "BACKPROP" )
1210 {
1211 params.trainMethod = ANN_MLP::BACKPROP;
1212 params.bpDWScale = (double)tpn["dw_scale"];
1213 params.bpMomentScale = (double)tpn["moment_scale"];
1214 }
1215 else if( tmethod_name == "RPROP" )
1216 {
1217 params.trainMethod = ANN_MLP::RPROP;
1218 params.rpDW0 = (double)tpn["dw0"];
1219 params.rpDWPlus = (double)tpn["dw_plus"];
1220 params.rpDWMinus = (double)tpn["dw_minus"];
1221 params.rpDWMin = (double)tpn["dw_min"];
1222 params.rpDWMax = (double)tpn["dw_max"];
1223 }
1224 else
1225 CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1226
1227 FileNode tcn = tpn["term_criteria"];
1228 if( !tcn.empty() )
1229 {
1230 FileNode tcn_e = tcn["epsilon"];
1231 FileNode tcn_i = tcn["iterations"];
1232 params.termCrit.type = 0;
1233 if( !tcn_e.empty() )
1234 {
1235 params.termCrit.type |= TermCriteria::EPS;
1236 params.termCrit.epsilon = (double)tcn_e;
1237 }
1238 if( !tcn_i.empty() )
1239 {
1240 params.termCrit.type |= TermCriteria::COUNT;
1241 params.termCrit.maxCount = (int)tcn_i;
1242 }
1243 }
1244 }
1245 }
1246
read(const FileNode & fn)1247 void read( const FileNode& fn )
1248 {
1249 clear();
1250
1251 vector<int> _layer_sizes;
1252 readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1253 setLayerSizes( _layer_sizes );
1254
1255 int i, l_count = layer_count();
1256 read_params(fn);
1257
1258 size_t esz = weights[0].elemSize();
1259
1260 FileNode w = fn["input_scale"];
1261 w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1262
1263 w = fn["output_scale"];
1264 w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1265
1266 w = fn["inv_output_scale"];
1267 w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1268
1269 FileNodeIterator w_it = fn["weights"].begin();
1270
1271 for( i = 1; i < l_count; i++, ++w_it )
1272 (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1273 trained = true;
1274 }
1275
getWeights(int layerIdx) const1276 Mat getWeights(int layerIdx) const
1277 {
1278 CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1279 return weights[layerIdx];
1280 }
1281
isTrained() const1282 bool isTrained() const
1283 {
1284 return trained;
1285 }
1286
isClassifier() const1287 bool isClassifier() const
1288 {
1289 return false;
1290 }
1291
getVarCount() const1292 int getVarCount() const
1293 {
1294 return layer_sizes.empty() ? 0 : layer_sizes[0];
1295 }
1296
getDefaultName() const1297 String getDefaultName() const
1298 {
1299 return "opencv_ml_ann_mlp";
1300 }
1301
1302 vector<int> layer_sizes;
1303 vector<Mat> weights;
1304 double f_param1, f_param2;
1305 double min_val, max_val, min_val1, max_val1;
1306 int activ_func;
1307 int max_lsize, max_buf_sz;
1308 AnnParams params;
1309 RNG rng;
1310 Mutex mtx;
1311 bool trained;
1312 };
1313
1314
create()1315 Ptr<ANN_MLP> ANN_MLP::create()
1316 {
1317 return makePtr<ANN_MLPImpl>();
1318 }
1319
1320 }}
1321
1322 /* End of file. */
1323