1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                        Intel License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
14 // Third party copyrights are property of their respective owners.
15 //
16 // Redistribution and use in source and binary forms, with or without modification,
17 // are permitted provided that the following conditions are met:
18 //
19 //   * Redistribution's of source code must retain the above copyright notice,
20 //     this list of conditions and the following disclaimer.
21 //
22 //   * Redistribution's in binary form must reproduce the above copyright notice,
23 //     this list of conditions and the following disclaimer in the documentation
24 //     and/or other materials provided with the distribution.
25 //
26 //   * The name of Intel Corporation may not be used to endorse or promote products
27 //     derived from this software without specific prior written permission.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the Intel Corporation or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 //M*/
41 
42 #include "_cv.h"
43 
44 /*
45  * This file includes the code, contributed by Simon Perreault
46  * (the function icvMedianBlur_8u_CnR_O1)
47  *
48  * Constant-time median filtering -- http://nomis80.org/ctmf.html
49  * Copyright (C) 2006 Simon Perreault
50  *
51  * Contact:
52  *  Laboratoire de vision et systemes numeriques
53  *  Pavillon Adrien-Pouliot
54  *  Universite Laval
55  *  Sainte-Foy, Quebec, Canada
56  *  G1K 7P4
57  *
58  *  perreaul@gel.ulaval.ca
59  */
60 
61 // uncomment the line below to force SSE2 mode
62 //#define CV_SSE2 1
63 
64 /****************************************************************************************\
65                                          Box Filter
66 \****************************************************************************************/
67 
68 static void icvSumRow_8u32s( const uchar* src0, int* dst, void* params );
69 static void icvSumRow_32f64f( const float* src0, double* dst, void* params );
70 static void icvSumCol_32s8u( const int** src, uchar* dst, int dst_step,
71                              int count, void* params );
72 static void icvSumCol_32s16s( const int** src, short* dst, int dst_step,
73                              int count, void* params );
74 static void icvSumCol_32s32s( const int** src, int* dst, int dst_step,
75                              int count, void* params );
76 static void icvSumCol_64f32f( const double** src, float* dst, int dst_step,
77                               int count, void* params );
78 
CvBoxFilter()79 CvBoxFilter::CvBoxFilter()
80 {
81     min_depth = CV_32S;
82     sum = 0;
83     sum_count = 0;
84     normalized = false;
85 }
86 
87 
CvBoxFilter(int _max_width,int _src_type,int _dst_type,bool _normalized,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)88 CvBoxFilter::CvBoxFilter( int _max_width, int _src_type, int _dst_type,
89                           bool _normalized, CvSize _ksize,
90                           CvPoint _anchor, int _border_mode,
91                           CvScalar _border_value )
92 {
93     min_depth = CV_32S;
94     sum = 0;
95     sum_count = 0;
96     normalized = false;
97     init( _max_width, _src_type, _dst_type, _normalized,
98           _ksize, _anchor, _border_mode, _border_value );
99 }
100 
101 
~CvBoxFilter()102 CvBoxFilter::~CvBoxFilter()
103 {
104     clear();
105 }
106 
107 
init(int _max_width,int _src_type,int _dst_type,bool _normalized,CvSize _ksize,CvPoint _anchor,int _border_mode,CvScalar _border_value)108 void CvBoxFilter::init( int _max_width, int _src_type, int _dst_type,
109                         bool _normalized, CvSize _ksize,
110                         CvPoint _anchor, int _border_mode,
111                         CvScalar _border_value )
112 {
113     CV_FUNCNAME( "CvBoxFilter::init" );
114 
115     __BEGIN__;
116 
117     sum = 0;
118     normalized = _normalized;
119 
120     if( (normalized && CV_MAT_TYPE(_src_type) != CV_MAT_TYPE(_dst_type)) ||
121         (!normalized && CV_MAT_CN(_src_type) != CV_MAT_CN(_dst_type)))
122         CV_ERROR( CV_StsUnmatchedFormats,
123         "In case of normalized box filter input and output must have the same type.\n"
124         "In case of unnormalized box filter the number of input and output channels must be the same" );
125 
126     min_depth = CV_MAT_DEPTH(_src_type) == CV_8U ? CV_32S : CV_64F;
127 
128     CvBaseImageFilter::init( _max_width, _src_type, _dst_type, 1, _ksize,
129                              _anchor, _border_mode, _border_value );
130 
131     scale = normalized ? 1./(ksize.width*ksize.height) : 1;
132 
133     if( CV_MAT_DEPTH(src_type) == CV_8U )
134         x_func = (CvRowFilterFunc)icvSumRow_8u32s;
135     else if( CV_MAT_DEPTH(src_type) == CV_32F )
136         x_func = (CvRowFilterFunc)icvSumRow_32f64f;
137     else
138         CV_ERROR( CV_StsUnsupportedFormat, "Unknown/unsupported input image format" );
139 
140     if( CV_MAT_DEPTH(dst_type) == CV_8U )
141     {
142         if( !normalized )
143             CV_ERROR( CV_StsBadArg, "Only normalized box filter can be used for 8u->8u transformation" );
144         y_func = (CvColumnFilterFunc)icvSumCol_32s8u;
145     }
146     else if( CV_MAT_DEPTH(dst_type) == CV_16S )
147     {
148         if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
149             CV_ERROR( CV_StsBadArg, "Only 8u->16s unnormalized box filter is supported in case of 16s output" );
150         y_func = (CvColumnFilterFunc)icvSumCol_32s16s;
151     }
152 	else if( CV_MAT_DEPTH(dst_type) == CV_32S )
153 	{
154 		if( normalized || CV_MAT_DEPTH(src_type) != CV_8U )
155 			CV_ERROR( CV_StsBadArg, "Only 8u->32s unnormalized box filter is supported in case of 32s output");
156 
157 		y_func = (CvColumnFilterFunc)icvSumCol_32s32s;
158 	}
159     else if( CV_MAT_DEPTH(dst_type) == CV_32F )
160     {
161         if( CV_MAT_DEPTH(src_type) != CV_32F )
162             CV_ERROR( CV_StsBadArg, "Only 32f->32f box filter (normalized or not) is supported in case of 32f output" );
163         y_func = (CvColumnFilterFunc)icvSumCol_64f32f;
164     }
165 	else{
166 		CV_ERROR( CV_StsBadArg, "Unknown/unsupported destination image format" );
167 	}
168 
169     __END__;
170 }
171 
172 
start_process(CvSlice x_range,int width)173 void CvBoxFilter::start_process( CvSlice x_range, int width )
174 {
175     CvBaseImageFilter::start_process( x_range, width );
176     int i, psz = CV_ELEM_SIZE(work_type);
177     uchar* s;
178     buf_end -= buf_step;
179     buf_max_count--;
180     assert( buf_max_count >= max_ky*2 + 1 );
181     s = sum = buf_end + cvAlign((width + ksize.width - 1)*CV_ELEM_SIZE(src_type), ALIGN);
182     sum_count = 0;
183 
184     width *= psz;
185     for( i = 0; i < width; i++ )
186         s[i] = (uchar)0;
187 }
188 
189 
190 static void
icvSumRow_8u32s(const uchar * src,int * dst,void * params)191 icvSumRow_8u32s( const uchar* src, int* dst, void* params )
192 {
193     const CvBoxFilter* state = (const CvBoxFilter*)params;
194     int ksize = state->get_kernel_size().width;
195     int width = state->get_width();
196     int cn = CV_MAT_CN(state->get_src_type());
197     int i, k;
198 
199     width = (width - 1)*cn; ksize *= cn;
200 
201     for( k = 0; k < cn; k++, src++, dst++ )
202     {
203         int s = 0;
204         for( i = 0; i < ksize; i += cn )
205             s += src[i];
206         dst[0] = s;
207         for( i = 0; i < width; i += cn )
208         {
209             s += src[i+ksize] - src[i];
210             dst[i+cn] = s;
211         }
212     }
213 }
214 
215 
216 static void
icvSumRow_32f64f(const float * src,double * dst,void * params)217 icvSumRow_32f64f( const float* src, double* dst, void* params )
218 {
219     const CvBoxFilter* state = (const CvBoxFilter*)params;
220     int ksize = state->get_kernel_size().width;
221     int width = state->get_width();
222     int cn = CV_MAT_CN(state->get_src_type());
223     int i, k;
224 
225     width = (width - 1)*cn; ksize *= cn;
226 
227     for( k = 0; k < cn; k++, src++, dst++ )
228     {
229         double s = 0;
230         for( i = 0; i < ksize; i += cn )
231             s += src[i];
232         dst[0] = s;
233         for( i = 0; i < width; i += cn )
234         {
235             s += (double)src[i+ksize] - src[i];
236             dst[i+cn] = s;
237         }
238     }
239 }
240 
241 
242 static void
icvSumCol_32s8u(const int ** src,uchar * dst,int dst_step,int count,void * params)243 icvSumCol_32s8u( const int** src, uchar* dst,
244                  int dst_step, int count, void* params )
245 {
246 #define BLUR_SHIFT 24
247     CvBoxFilter* state = (CvBoxFilter*)params;
248     int ksize = state->get_kernel_size().height;
249     int i, width = state->get_width();
250     int cn = CV_MAT_CN(state->get_src_type());
251     double scale = state->get_scale();
252     int iscale = cvFloor(scale*(1 << BLUR_SHIFT));
253     int* sum = (int*)state->get_sum_buf();
254     int* _sum_count = state->get_sum_count_ptr();
255     int sum_count = *_sum_count;
256 
257     width *= cn;
258     src += sum_count;
259     count += ksize - 1 - sum_count;
260 
261     for( ; count--; src++ )
262     {
263         const int* sp = src[0];
264         if( sum_count+1 < ksize )
265         {
266             for( i = 0; i <= width - 2; i += 2 )
267             {
268                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
269                 sum[i] = s0; sum[i+1] = s1;
270             }
271 
272             for( ; i < width; i++ )
273                 sum[i] += sp[i];
274 
275             sum_count++;
276         }
277         else
278         {
279             const int* sm = src[-ksize+1];
280             for( i = 0; i <= width - 2; i += 2 )
281             {
282                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
283                 int t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT), t1 = CV_DESCALE(s1*iscale, BLUR_SHIFT);
284                 s0 -= sm[i]; s1 -= sm[i+1];
285                 sum[i] = s0; sum[i+1] = s1;
286                 dst[i] = (uchar)t0; dst[i+1] = (uchar)t1;
287             }
288 
289             for( ; i < width; i++ )
290             {
291                 int s0 = sum[i] + sp[i], t0 = CV_DESCALE(s0*iscale, BLUR_SHIFT);
292                 sum[i] = s0 - sm[i]; dst[i] = (uchar)t0;
293             }
294             dst += dst_step;
295         }
296     }
297 
298     *_sum_count = sum_count;
299 #undef BLUR_SHIFT
300 }
301 
302 
303 static void
icvSumCol_32s16s(const int ** src,short * dst,int dst_step,int count,void * params)304 icvSumCol_32s16s( const int** src, short* dst,
305                   int dst_step, int count, void* params )
306 {
307     CvBoxFilter* state = (CvBoxFilter*)params;
308     int ksize = state->get_kernel_size().height;
309     int ktotal = ksize*state->get_kernel_size().width;
310     int i, width = state->get_width();
311     int cn = CV_MAT_CN(state->get_src_type());
312     int* sum = (int*)state->get_sum_buf();
313     int* _sum_count = state->get_sum_count_ptr();
314     int sum_count = *_sum_count;
315 
316     dst_step /= sizeof(dst[0]);
317     width *= cn;
318     src += sum_count;
319     count += ksize - 1 - sum_count;
320 
321     for( ; count--; src++ )
322     {
323         const int* sp = src[0];
324         if( sum_count+1 < ksize )
325         {
326             for( i = 0; i <= width - 2; i += 2 )
327             {
328                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
329                 sum[i] = s0; sum[i+1] = s1;
330             }
331 
332             for( ; i < width; i++ )
333                 sum[i] += sp[i];
334 
335             sum_count++;
336         }
337         else if( ktotal < 128 )
338         {
339             const int* sm = src[-ksize+1];
340             for( i = 0; i <= width - 2; i += 2 )
341             {
342                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
343                 dst[i] = (short)s0; dst[i+1] = (short)s1;
344                 s0 -= sm[i]; s1 -= sm[i+1];
345                 sum[i] = s0; sum[i+1] = s1;
346             }
347 
348             for( ; i < width; i++ )
349             {
350                 int s0 = sum[i] + sp[i];
351                 dst[i] = (short)s0;
352                 sum[i] = s0 - sm[i];
353             }
354             dst += dst_step;
355         }
356         else
357         {
358             const int* sm = src[-ksize+1];
359             for( i = 0; i <= width - 2; i += 2 )
360             {
361                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
362                 dst[i] = CV_CAST_16S(s0); dst[i+1] = CV_CAST_16S(s1);
363                 s0 -= sm[i]; s1 -= sm[i+1];
364                 sum[i] = s0; sum[i+1] = s1;
365             }
366 
367             for( ; i < width; i++ )
368             {
369                 int s0 = sum[i] + sp[i];
370                 dst[i] = CV_CAST_16S(s0);
371                 sum[i] = s0 - sm[i];
372             }
373             dst += dst_step;
374         }
375     }
376 
377     *_sum_count = sum_count;
378 }
379 
380 static void
icvSumCol_32s32s(const int ** src,int * dst,int dst_step,int count,void * params)381 icvSumCol_32s32s( const int** src, int * dst,
382                   int dst_step, int count, void* params )
383 {
384     CvBoxFilter* state = (CvBoxFilter*)params;
385     int ksize = state->get_kernel_size().height;
386     int i, width = state->get_width();
387     int cn = CV_MAT_CN(state->get_src_type());
388     int* sum = (int*)state->get_sum_buf();
389     int* _sum_count = state->get_sum_count_ptr();
390     int sum_count = *_sum_count;
391 
392     dst_step /= sizeof(dst[0]);
393     width *= cn;
394     src += sum_count;
395     count += ksize - 1 - sum_count;
396 
397     for( ; count--; src++ )
398     {
399         const int* sp = src[0];
400         if( sum_count+1 < ksize )
401         {
402             for( i = 0; i <= width - 2; i += 2 )
403             {
404                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
405                 sum[i] = s0; sum[i+1] = s1;
406             }
407 
408             for( ; i < width; i++ )
409                 sum[i] += sp[i];
410 
411             sum_count++;
412         }
413         else
414         {
415             const int* sm = src[-ksize+1];
416             for( i = 0; i <= width - 2; i += 2 )
417             {
418                 int s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
419                 dst[i] = s0; dst[i+1] = s1;
420                 s0 -= sm[i]; s1 -= sm[i+1];
421                 sum[i] = s0; sum[i+1] = s1;
422             }
423 
424             for( ; i < width; i++ )
425             {
426                 int s0 = sum[i] + sp[i];
427                 dst[i] = s0;
428                 sum[i] = s0 - sm[i];
429             }
430             dst += dst_step;
431         }
432     }
433 
434     *_sum_count = sum_count;
435 }
436 
437 
438 static void
icvSumCol_64f32f(const double ** src,float * dst,int dst_step,int count,void * params)439 icvSumCol_64f32f( const double** src, float* dst,
440                   int dst_step, int count, void* params )
441 {
442     CvBoxFilter* state = (CvBoxFilter*)params;
443     int ksize = state->get_kernel_size().height;
444     int i, width = state->get_width();
445     int cn = CV_MAT_CN(state->get_src_type());
446     double scale = state->get_scale();
447     bool normalized = state->is_normalized();
448     double* sum = (double*)state->get_sum_buf();
449     int* _sum_count = state->get_sum_count_ptr();
450     int sum_count = *_sum_count;
451 
452     dst_step /= sizeof(dst[0]);
453     width *= cn;
454     src += sum_count;
455     count += ksize - 1 - sum_count;
456 
457     for( ; count--; src++ )
458     {
459         const double* sp = src[0];
460         if( sum_count+1 < ksize )
461         {
462             for( i = 0; i <= width - 2; i += 2 )
463             {
464                 double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
465                 sum[i] = s0; sum[i+1] = s1;
466             }
467 
468             for( ; i < width; i++ )
469                 sum[i] += sp[i];
470 
471             sum_count++;
472         }
473         else
474         {
475             const double* sm = src[-ksize+1];
476             if( normalized )
477                 for( i = 0; i <= width - 2; i += 2 )
478                 {
479                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
480                     double t0 = s0*scale, t1 = s1*scale;
481                     s0 -= sm[i]; s1 -= sm[i+1];
482                     dst[i] = (float)t0; dst[i+1] = (float)t1;
483                     sum[i] = s0; sum[i+1] = s1;
484                 }
485             else
486                 for( i = 0; i <= width - 2; i += 2 )
487                 {
488                     double s0 = sum[i] + sp[i], s1 = sum[i+1] + sp[i+1];
489                     dst[i] = (float)s0; dst[i+1] = (float)s1;
490                     s0 -= sm[i]; s1 -= sm[i+1];
491                     sum[i] = s0; sum[i+1] = s1;
492                 }
493 
494             for( ; i < width; i++ )
495             {
496                 double s0 = sum[i] + sp[i], t0 = s0*scale;
497                 sum[i] = s0 - sm[i]; dst[i] = (float)t0;
498             }
499             dst += dst_step;
500         }
501     }
502 
503     *_sum_count = sum_count;
504 }
505 
506 
507 /****************************************************************************************\
508                                       Median Filter
509 \****************************************************************************************/
510 
511 #define CV_MINMAX_8U(a,b) \
512     (t = CV_FAST_CAST_8U((a) - (b)), (b) += t, a -= t)
513 
514 #if CV_SSE2 && !defined __SSE2__
515 #define __SSE2__ 1
516 #include "emmintrin.h"
517 #endif
518 
519 #if defined(__VEC__) || defined(__ALTIVEC__)
520 #include <altivec.h>
521 #undef bool
522 #endif
523 
524 #if defined(__GNUC__)
525 #define align(x) __attribute__ ((aligned (x)))
526 #elif CV_SSE2 && (defined(__ICL) || (_MSC_VER >= 1300))
527 #define align(x) __declspec(align(x))
528 #else
529 #define align(x)
530 #endif
531 
532 #if _MSC_VER >= 1200
533 #pragma warning( disable: 4244 )
534 #endif
535 
536 /**
537  * This structure represents a two-tier histogram. The first tier (known as the
538  * "coarse" level) is 4 bit wide and the second tier (known as the "fine" level)
539  * is 8 bit wide. Pixels inserted in the fine level also get inserted into the
540  * coarse bucket designated by the 4 MSBs of the fine bucket value.
541  *
542  * The structure is aligned on 16 bits, which is a prerequisite for SIMD
543  * instructions. Each bucket is 16 bit wide, which means that extra care must be
544  * taken to prevent overflow.
545  */
546 typedef struct align(16)
547 {
548     ushort coarse[16];
549     ushort fine[16][16];
550 } Histogram;
551 
552 /**
553  * HOP is short for Histogram OPeration. This macro makes an operation \a op on
554  * histogram \a h for pixel value \a x. It takes care of handling both levels.
555  */
556 #define HOP(h,x,op) \
557     h.coarse[x>>4] op; \
558     *((ushort*) h.fine + x) op;
559 
560 #define COP(c,j,x,op) \
561     h_coarse[ 16*(n*c+j) + (x>>4) ] op; \
562     h_fine[ 16 * (n*(16*c+(x>>4)) + j) + (x & 0xF) ] op;
563 
564 #if defined __SSE2__ || defined __MMX__ || defined __ALTIVEC__
565 #define MEDIAN_HAVE_SIMD 1
566 #else
567 #define MEDIAN_HAVE_SIMD 0
568 #endif
569 
570 /**
571  * Adds histograms \a x and \a y and stores the result in \a y. Makes use of
572  * SSE2, MMX or Altivec, if available.
573  */
574 #if defined(__SSE2__)
histogram_add(const ushort x[16],ushort y[16])575 static inline void histogram_add( const ushort x[16], ushort y[16] )
576 {
577     _mm_store_si128( (__m128i*) &y[0], _mm_add_epi16(
578         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
579     _mm_store_si128( (__m128i*) &y[8], _mm_add_epi16(
580         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
581 }
582 #elif defined(__MMX__)
histogram_add(const ushort x[16],ushort y[16])583 static inline void histogram_add( const ushort x[16], ushort y[16] )
584 {
585     *(__m64*) &y[0]  = _mm_add_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
586     *(__m64*) &y[4]  = _mm_add_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
587     *(__m64*) &y[8]  = _mm_add_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
588     *(__m64*) &y[12] = _mm_add_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
589 }
590 #elif defined(__ALTIVEC__)
histogram_add(const ushort x[16],ushort y[16])591 static inline void histogram_add( const ushort x[16], ushort y[16] )
592 {
593     *(vector ushort*) &y[0] = vec_add( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
594     *(vector ushort*) &y[8] = vec_add( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
595 }
596 #else
histogram_add(const ushort x[16],ushort y[16])597 static inline void histogram_add( const ushort x[16], ushort y[16] )
598 {
599     int i;
600     for( i = 0; i < 16; ++i )
601         y[i] = (ushort)(y[i] + x[i]);
602 }
603 #endif
604 
605 /**
606  * Subtracts histogram \a x from \a y and stores the result in \a y. Makes use
607  * of SSE2, MMX or Altivec, if available.
608  */
609 #if defined(__SSE2__)
histogram_sub(const ushort x[16],ushort y[16])610 static inline void histogram_sub( const ushort x[16], ushort y[16] )
611 {
612     _mm_store_si128( (__m128i*) &y[0], _mm_sub_epi16(
613         _mm_load_si128((__m128i*) &y[0]), _mm_load_si128((__m128i*) &x[0] )));
614     _mm_store_si128( (__m128i*) &y[8], _mm_sub_epi16(
615         _mm_load_si128((__m128i*) &y[8]), _mm_load_si128((__m128i*) &x[8] )));
616 }
617 #elif defined(__MMX__)
histogram_sub(const ushort x[16],ushort y[16])618 static inline void histogram_sub( const ushort x[16], ushort y[16] )
619 {
620     *(__m64*) &y[0]  = _mm_sub_pi16( *(__m64*) &y[0],  *(__m64*) &x[0]  );
621     *(__m64*) &y[4]  = _mm_sub_pi16( *(__m64*) &y[4],  *(__m64*) &x[4]  );
622     *(__m64*) &y[8]  = _mm_sub_pi16( *(__m64*) &y[8],  *(__m64*) &x[8]  );
623     *(__m64*) &y[12] = _mm_sub_pi16( *(__m64*) &y[12], *(__m64*) &x[12] );
624 }
625 #elif defined(__ALTIVEC__)
histogram_sub(const ushort x[16],ushort y[16])626 static inline void histogram_sub( const ushort x[16], ushort y[16] )
627 {
628     *(vector ushort*) &y[0] = vec_sub( *(vector ushort*) &y[0], *(vector ushort*) &x[0] );
629     *(vector ushort*) &y[8] = vec_sub( *(vector ushort*) &y[8], *(vector ushort*) &x[8] );
630 }
631 #else
histogram_sub(const ushort x[16],ushort y[16])632 static inline void histogram_sub( const ushort x[16], ushort y[16] )
633 {
634     int i;
635     for( i = 0; i < 16; ++i )
636         y[i] = (ushort)(y[i] - x[i]);
637 }
638 #endif
639 
histogram_muladd(int a,const ushort x[16],ushort y[16])640 static inline void histogram_muladd( int a, const ushort x[16],
641         ushort y[16] )
642 {
643     int i;
644     for ( i = 0; i < 16; ++i )
645         y[i] = (ushort)(y[i] + a * x[i]);
646 }
647 
648 static CvStatus CV_STDCALL
icvMedianBlur_8u_CnR_O1(uchar * src,int src_step,uchar * dst,int dst_step,CvSize size,int kernel_size,int cn,int pad_left,int pad_right)649 icvMedianBlur_8u_CnR_O1( uchar* src, int src_step, uchar* dst, int dst_step,
650                          CvSize size, int kernel_size, int cn, int pad_left, int pad_right )
651 {
652     int r = (kernel_size-1)/2;
653     const int m = size.height, n = size.width;
654     int i, j, k, c;
655     const unsigned char *p, *q;
656     Histogram H[4];
657     ushort *h_coarse, *h_fine, luc[4][16];
658 
659     if( size.height < r || size.width < r )
660         return CV_BADSIZE_ERR;
661 
662     assert( src );
663     assert( dst );
664     assert( r >= 0 );
665     assert( size.width >= 2*r+1 );
666     assert( size.height >= 2*r+1 );
667     assert( src_step != 0 );
668     assert( dst_step != 0 );
669 
670     h_coarse = (ushort*) cvAlloc(  1 * 16 * n * cn * sizeof(ushort) );
671     h_fine   = (ushort*) cvAlloc( 16 * 16 * n * cn * sizeof(ushort) );
672     memset( h_coarse, 0,  1 * 16 * n * cn * sizeof(ushort) );
673     memset( h_fine,   0, 16 * 16 * n * cn * sizeof(ushort) );
674 
675     /* First row initialization */
676     for ( j = 0; j < n; ++j ) {
677         for ( c = 0; c < cn; ++c ) {
678             COP( c, j, src[cn*j+c], += r+1 );
679         }
680     }
681     for ( i = 0; i < r; ++i ) {
682         for ( j = 0; j < n; ++j ) {
683             for ( c = 0; c < cn; ++c ) {
684                 COP( c, j, src[src_step*i+cn*j+c], ++ );
685             }
686         }
687     }
688 
689     for ( i = 0; i < m; ++i ) {
690 
691         /* Update column histograms for entire row. */
692         p = src + src_step * MAX( 0, i-r-1 );
693         q = p + cn * n;
694         for ( j = 0; p != q; ++j ) {
695             for ( c = 0; c < cn; ++c, ++p ) {
696                 COP( c, j, *p, -- );
697             }
698         }
699 
700         p = src + src_step * MIN( m-1, i+r );
701         q = p + cn * n;
702         for ( j = 0; p != q; ++j ) {
703             for ( c = 0; c < cn; ++c, ++p ) {
704                 COP( c, j, *p, ++ );
705             }
706         }
707 
708         /* First column initialization */
709         memset( H, 0, cn*sizeof(H[0]) );
710         memset( luc, 0, cn*sizeof(luc[0]) );
711         if ( pad_left ) {
712             for ( c = 0; c < cn; ++c ) {
713                 histogram_muladd( r, &h_coarse[16*n*c], H[c].coarse );
714             }
715         }
716         for ( j = 0; j < (pad_left ? r : 2*r); ++j ) {
717             for ( c = 0; c < cn; ++c ) {
718                 histogram_add( &h_coarse[16*(n*c+j)], H[c].coarse );
719             }
720         }
721         for ( c = 0; c < cn; ++c ) {
722             for ( k = 0; k < 16; ++k ) {
723                 histogram_muladd( 2*r+1, &h_fine[16*n*(16*c+k)], &H[c].fine[k][0] );
724             }
725         }
726 
727         for ( j = pad_left ? 0 : r; j < (pad_right ? n : n-r); ++j ) {
728             for ( c = 0; c < cn; ++c ) {
729                 int t = 2*r*r + 2*r, b, sum = 0;
730                 ushort* segment;
731 
732                 histogram_add( &h_coarse[16*(n*c + MIN(j+r,n-1))], H[c].coarse );
733 
734                 /* Find median at coarse level */
735                 for ( k = 0; k < 16 ; ++k ) {
736                     sum += H[c].coarse[k];
737                     if ( sum > t ) {
738                         sum -= H[c].coarse[k];
739                         break;
740                     }
741                 }
742                 assert( k < 16 );
743 
744                 /* Update corresponding histogram segment */
745                 if ( luc[c][k] <= j-r ) {
746                     memset( &H[c].fine[k], 0, 16 * sizeof(ushort) );
747                     for ( luc[c][k] = j-r; luc[c][k] < MIN(j+r+1,n); ++luc[c][k] ) {
748                         histogram_add( &h_fine[16*(n*(16*c+k)+luc[c][k])], H[c].fine[k] );
749                     }
750                     if ( luc[c][k] < j+r+1 ) {
751                         histogram_muladd( j+r+1 - n, &h_fine[16*(n*(16*c+k)+(n-1))], &H[c].fine[k][0] );
752                         luc[c][k] = (ushort)(j+r+1);
753                     }
754                 }
755                 else {
756                     for ( ; luc[c][k] < j+r+1; ++luc[c][k] ) {
757                         histogram_sub( &h_fine[16*(n*(16*c+k)+MAX(luc[c][k]-2*r-1,0))], H[c].fine[k] );
758                         histogram_add( &h_fine[16*(n*(16*c+k)+MIN(luc[c][k],n-1))], H[c].fine[k] );
759                     }
760                 }
761 
762                 histogram_sub( &h_coarse[16*(n*c+MAX(j-r,0))], H[c].coarse );
763 
764                 /* Find median in segment */
765                 segment = H[c].fine[k];
766                 for ( b = 0; b < 16 ; ++b ) {
767                     sum += segment[b];
768                     if ( sum > t ) {
769                         dst[dst_step*i+cn*j+c] = (uchar)(16*k + b);
770                         break;
771                     }
772                 }
773                 assert( b < 16 );
774             }
775         }
776     }
777 
778 #if defined(__MMX__)
779     _mm_empty();
780 #endif
781 
782     cvFree(&h_coarse);
783     cvFree(&h_fine);
784 
785 #undef HOP
786 #undef COP
787     return CV_OK;
788 }
789 
790 
791 #if _MSC_VER >= 1200
792 #pragma warning( default: 4244 )
793 #endif
794 
795 
796 static CvStatus CV_STDCALL
icvMedianBlur_8u_CnR_Om(uchar * src,int src_step,uchar * dst,int dst_step,CvSize size,int m,int cn)797 icvMedianBlur_8u_CnR_Om( uchar* src, int src_step, uchar* dst, int dst_step,
798                          CvSize size, int m, int cn )
799 {
800     #define N  16
801     int     zone0[4][N];
802     int     zone1[4][N*N];
803     int     x, y;
804     int     n2 = m*m/2;
805     int     nx = (m + 1)/2 - 1;
806     uchar*  src_max = src + size.height*src_step;
807     uchar*  src_right = src + size.width*cn;
808 
809     #define UPDATE_ACC01( pix, cn, op ) \
810     {                                   \
811         int p = (pix);                  \
812         zone1[cn][p] op;                \
813         zone0[cn][p >> 4] op;           \
814     }
815 
816     if( size.height < nx || size.width < nx )
817         return CV_BADSIZE_ERR;
818 
819     if( m == 3 )
820     {
821         size.width *= cn;
822 
823         for( y = 0; y < size.height; y++, dst += dst_step )
824         {
825             const uchar* src0 = src + src_step*(y-1);
826             const uchar* src1 = src0 + src_step;
827             const uchar* src2 = src1 + src_step;
828             if( y == 0 )
829                 src0 = src1;
830             else if( y == size.height - 1 )
831                 src2 = src1;
832 
833             for( x = 0; x < 2*cn; x++ )
834             {
835                 int x0 = x < cn ? x : size.width - 3*cn + x;
836                 int x2 = x < cn ? x + cn : size.width - 2*cn + x;
837                 int x1 = x < cn ? x0 : x2, t;
838 
839                 int p0 = src0[x0], p1 = src0[x1], p2 = src0[x2];
840                 int p3 = src1[x0], p4 = src1[x1], p5 = src1[x2];
841                 int p6 = src2[x0], p7 = src2[x1], p8 = src2[x2];
842 
843                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
844                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
845                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
846                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
847                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
848                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
849                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
850                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
851                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
852                 CV_MINMAX_8U(p4, p2);
853                 dst[x1] = (uchar)p4;
854             }
855 
856             for( x = cn; x < size.width - cn; x++ )
857             {
858                 int p0 = src0[x-cn], p1 = src0[x], p2 = src0[x+cn];
859                 int p3 = src1[x-cn], p4 = src1[x], p5 = src1[x+cn];
860                 int p6 = src2[x-cn], p7 = src2[x], p8 = src2[x+cn];
861                 int t;
862 
863                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
864                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p1);
865                 CV_MINMAX_8U(p3, p4); CV_MINMAX_8U(p6, p7);
866                 CV_MINMAX_8U(p1, p2); CV_MINMAX_8U(p4, p5);
867                 CV_MINMAX_8U(p7, p8); CV_MINMAX_8U(p0, p3);
868                 CV_MINMAX_8U(p5, p8); CV_MINMAX_8U(p4, p7);
869                 CV_MINMAX_8U(p3, p6); CV_MINMAX_8U(p1, p4);
870                 CV_MINMAX_8U(p2, p5); CV_MINMAX_8U(p4, p7);
871                 CV_MINMAX_8U(p4, p2); CV_MINMAX_8U(p6, p4);
872                 CV_MINMAX_8U(p4, p2);
873 
874                 dst[x] = (uchar)p4;
875             }
876         }
877 
878         return CV_OK;
879     }
880 
881     for( x = 0; x < size.width; x++, dst += cn )
882     {
883         uchar* dst_cur = dst;
884         uchar* src_top = src;
885         uchar* src_bottom = src;
886         int    k, c;
887         int    x0 = -1;
888         int    src_step1 = src_step, dst_step1 = dst_step;
889 
890         if( x % 2 != 0 )
891         {
892             src_bottom = src_top += src_step*(size.height-1);
893             dst_cur += dst_step*(size.height-1);
894             src_step1 = -src_step1;
895             dst_step1 = -dst_step1;
896         }
897 
898         if( x <= m/2 )
899             nx++;
900 
901         if( nx < m )
902             x0 = x < m/2 ? 0 : (nx-1)*cn;
903 
904         // init accumulator
905         memset( zone0, 0, sizeof(zone0[0])*cn );
906         memset( zone1, 0, sizeof(zone1[0])*cn );
907 
908         for( y = 0; y <= m/2; y++ )
909         {
910             for( c = 0; c < cn; c++ )
911             {
912                 if( y > 0 )
913                 {
914                     if( x0 >= 0 )
915                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
916                     for( k = 0; k < nx*cn; k += cn )
917                         UPDATE_ACC01( src_bottom[k+c], c, ++ );
918                 }
919                 else
920                 {
921                     if( x0 >= 0 )
922                         UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx)*(m/2+1) );
923                     for( k = 0; k < nx*cn; k += cn )
924                         UPDATE_ACC01( src_bottom[k+c], c, += m/2+1 );
925                 }
926             }
927 
928             if( (src_step1 > 0 && y < size.height-1) ||
929                 (src_step1 < 0 && size.height-y-1 > 0) )
930                 src_bottom += src_step1;
931         }
932 
933         for( y = 0; y < size.height; y++, dst_cur += dst_step1 )
934         {
935             // find median
936             for( c = 0; c < cn; c++ )
937             {
938                 int s = 0;
939                 for( k = 0; ; k++ )
940                 {
941                     int t = s + zone0[c][k];
942                     if( t > n2 ) break;
943                     s = t;
944                 }
945 
946                 for( k *= N; ;k++ )
947                 {
948                     s += zone1[c][k];
949                     if( s > n2 ) break;
950                 }
951 
952                 dst_cur[c] = (uchar)k;
953             }
954 
955             if( y+1 == size.height )
956                 break;
957 
958             if( cn == 1 )
959             {
960                 for( k = 0; k < nx; k++ )
961                 {
962                     int p = src_top[k];
963                     int q = src_bottom[k];
964                     zone1[0][p]--;
965                     zone0[0][p>>4]--;
966                     zone1[0][q]++;
967                     zone0[0][q>>4]++;
968                 }
969             }
970             else if( cn == 3 )
971             {
972                 for( k = 0; k < nx*3; k += 3 )
973                 {
974                     UPDATE_ACC01( src_top[k], 0, -- );
975                     UPDATE_ACC01( src_top[k+1], 1, -- );
976                     UPDATE_ACC01( src_top[k+2], 2, -- );
977 
978                     UPDATE_ACC01( src_bottom[k], 0, ++ );
979                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
980                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
981                 }
982             }
983             else
984             {
985                 assert( cn == 4 );
986                 for( k = 0; k < nx*4; k += 4 )
987                 {
988                     UPDATE_ACC01( src_top[k], 0, -- );
989                     UPDATE_ACC01( src_top[k+1], 1, -- );
990                     UPDATE_ACC01( src_top[k+2], 2, -- );
991                     UPDATE_ACC01( src_top[k+3], 3, -- );
992 
993                     UPDATE_ACC01( src_bottom[k], 0, ++ );
994                     UPDATE_ACC01( src_bottom[k+1], 1, ++ );
995                     UPDATE_ACC01( src_bottom[k+2], 2, ++ );
996                     UPDATE_ACC01( src_bottom[k+3], 3, ++ );
997                 }
998             }
999 
1000             if( x0 >= 0 )
1001             {
1002                 for( c = 0; c < cn; c++ )
1003                 {
1004                     UPDATE_ACC01( src_top[x0+c], c, -= (m - nx) );
1005                     UPDATE_ACC01( src_bottom[x0+c], c, += (m - nx) );
1006                 }
1007             }
1008 
1009             if( (src_step1 > 0 && src_bottom + src_step1 < src_max) ||
1010                 (src_step1 < 0 && src_bottom + src_step1 >= src) )
1011                 src_bottom += src_step1;
1012 
1013             if( y >= m/2 )
1014                 src_top += src_step1;
1015         }
1016 
1017         if( x >= m/2 )
1018             src += cn;
1019         if( src + nx*cn > src_right ) nx--;
1020     }
1021 #undef N
1022 #undef UPDATE_ACC
1023     return CV_OK;
1024 }
1025 
1026 
1027 /****************************************************************************************\
1028                                    Bilateral Filtering
1029 \****************************************************************************************/
1030 
1031 static void
icvBilateralFiltering_8u(const CvMat * src,CvMat * dst,int d,double sigma_color,double sigma_space)1032 icvBilateralFiltering_8u( const CvMat* src, CvMat* dst, int d,
1033                           double sigma_color, double sigma_space )
1034 {
1035     CvMat* temp = 0;
1036     float* color_weight = 0;
1037     float* space_weight = 0;
1038     int* space_ofs = 0;
1039 
1040     CV_FUNCNAME( "icvBilateralFiltering_8u" );
1041 
1042     __BEGIN__;
1043 
1044     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1045     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1046     int cn = CV_MAT_CN(src->type);
1047     int i, j, k, maxk, radius;
1048     CvSize size = cvGetMatSize(src);
1049 
1050     if( (CV_MAT_TYPE(src->type) != CV_8UC1 &&
1051         CV_MAT_TYPE(src->type) != CV_8UC3) ||
1052         !CV_ARE_TYPES_EQ(src, dst) )
1053         CV_ERROR( CV_StsUnsupportedFormat,
1054         "Both source and destination must be 8-bit, single-channel or 3-channel images" );
1055 
1056     if( sigma_color <= 0 )
1057         sigma_color = 1;
1058     if( sigma_space <= 0 )
1059         sigma_space = 1;
1060 
1061     if( d == 0 )
1062         radius = cvRound(sigma_space*1.5);
1063     else
1064         radius = d/2;
1065     radius = MAX(radius, 1);
1066     d = radius*2 + 1;
1067 
1068     CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1069         src->cols + radius*2, src->type ));
1070     CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1071     CV_CALL( color_weight = (float*)cvAlloc(cn*256*sizeof(color_weight[0])));
1072     CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1073     CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1074 
1075     // initialize color-related bilateral filter coefficients
1076     for( i = 0; i < 256*cn; i++ )
1077         color_weight[i] = (float)exp(i*i*gauss_color_coeff);
1078 
1079     // initialize space-related bilateral filter coefficients
1080     for( i = -radius, maxk = 0; i <= radius; i++ )
1081         for( j = -radius; j <= radius; j++ )
1082         {
1083             double r = sqrt((double)i*i + (double)j*j);
1084             if( r > radius )
1085                 continue;
1086             space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1087             space_ofs[maxk++] = i*temp->step + j*cn;
1088         }
1089 
1090     for( i = 0; i < size.height; i++ )
1091     {
1092         const uchar* sptr = temp->data.ptr + (i+radius)*temp->step + radius*cn;
1093         uchar* dptr = dst->data.ptr + i*dst->step;
1094 
1095         if( cn == 1 )
1096         {
1097             for( j = 0; j < size.width; j++ )
1098             {
1099                 float sum = 0, wsum = 0;
1100                 int val0 = sptr[j];
1101                 for( k = 0; k < maxk; k++ )
1102                 {
1103                     int val = sptr[j + space_ofs[k]];
1104                     float w = space_weight[k]*color_weight[abs(val - val0)];
1105                     sum += val*w;
1106                     wsum += w;
1107                 }
1108                 // overflow is not possible here => there is no need to use CV_CAST_8U
1109                 dptr[j] = (uchar)cvRound(sum/wsum);
1110             }
1111         }
1112         else
1113         {
1114             assert( cn == 3 );
1115             for( j = 0; j < size.width*3; j += 3 )
1116             {
1117                 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1118                 int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1119                 for( k = 0; k < maxk; k++ )
1120                 {
1121                     const uchar* sptr_k = sptr + j + space_ofs[k];
1122                     int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1123                     float w = space_weight[k]*color_weight[abs(b - b0) +
1124                         abs(g - g0) + abs(r - r0)];
1125                     sum_b += b*w; sum_g += g*w; sum_r += r*w;
1126                     wsum += w;
1127                 }
1128                 wsum = 1.f/wsum;
1129                 b0 = cvRound(sum_b*wsum);
1130                 g0 = cvRound(sum_g*wsum);
1131                 r0 = cvRound(sum_r*wsum);
1132                 dptr[j] = (uchar)b0; dptr[j+1] = (uchar)g0; dptr[j+2] = (uchar)r0;
1133             }
1134         }
1135     }
1136 
1137     __END__;
1138 
1139     cvReleaseMat( &temp );
1140     cvFree( &color_weight );
1141     cvFree( &space_weight );
1142     cvFree( &space_ofs );
1143 }
1144 
1145 
icvBilateralFiltering_32f(const CvMat * src,CvMat * dst,int d,double sigma_color,double sigma_space)1146 static void icvBilateralFiltering_32f( const CvMat* src, CvMat* dst, int d,
1147                                        double sigma_color, double sigma_space )
1148 {
1149   	CvMat* temp = 0;
1150     float* space_weight = 0;
1151     int* space_ofs = 0;
1152     float *expLUT = 0;
1153 
1154     CV_FUNCNAME( "icvBilateralFiltering_32f" );
1155 
1156     __BEGIN__;
1157 
1158     double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
1159     double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
1160     int cn = CV_MAT_CN(src->type);
1161     int i, j, k, maxk, radius;
1162     double minValSrc=-1, maxValSrc=1;
1163     const int kExpNumBinsPerChannel = 1 << 12;
1164     int kExpNumBins = 0;
1165     float lastExpVal = 1.f;
1166     int temp_step;
1167     float len, scale_index;
1168     CvMat src_reshaped;
1169 
1170     CvSize size = cvGetMatSize(src);
1171 
1172     if( (CV_MAT_TYPE(src->type) != CV_32FC1 &&
1173         CV_MAT_TYPE(src->type) != CV_32FC3) ||
1174         !CV_ARE_TYPES_EQ(src, dst) )
1175         CV_ERROR( CV_StsUnsupportedFormat,
1176         "Both source and destination must be 32-bit float, single-channel or 3-channel images" );
1177 
1178     if( sigma_color <= 0 )
1179         sigma_color = 1;
1180     if( sigma_space <= 0 )
1181         sigma_space = 1;
1182 
1183     if( d == 0 )
1184         radius = cvRound(sigma_space*1.5);
1185     else
1186         radius = d/2;
1187     radius = MAX(radius, 1);
1188     d = radius*2 + 1;
1189     // compute the min/max range for the input image (even if multichannel)
1190 
1191     CV_CALL( cvReshape( src, &src_reshaped, 1 ) );
1192     CV_CALL( cvMinMaxLoc(&src_reshaped, &minValSrc, &maxValSrc) );
1193 
1194     // temporary copy of the image with borders for easy processing
1195     CV_CALL( temp = cvCreateMat( src->rows + radius*2,
1196         src->cols + radius*2, src->type ));
1197     temp_step = temp->step/sizeof(float);
1198     CV_CALL( cvCopyMakeBorder( src, temp, cvPoint(radius,radius), IPL_BORDER_REPLICATE ));
1199     // allocate lookup tables
1200     CV_CALL( space_weight = (float*)cvAlloc(d*d*sizeof(space_weight[0])));
1201     CV_CALL( space_ofs = (int*)cvAlloc(d*d*sizeof(space_ofs[0])));
1202 
1203     // assign a length which is slightly more than needed
1204     len = (float)(maxValSrc - minValSrc) * cn;
1205     kExpNumBins = kExpNumBinsPerChannel * cn;
1206     CV_CALL( expLUT = (float*)cvAlloc((kExpNumBins+2) * sizeof(expLUT[0])));
1207     scale_index = kExpNumBins/len;
1208 
1209     // initialize the exp LUT
1210     for( i = 0; i < kExpNumBins+2; i++ )
1211     {
1212         if( lastExpVal > 0.f )
1213         {
1214             double val =  i / scale_index;
1215             expLUT[i] = (float)exp(val * val * gauss_color_coeff);
1216             lastExpVal = expLUT[i];
1217         }
1218         else
1219             expLUT[i] = 0.f;
1220     }
1221 
1222     // initialize space-related bilateral filter coefficients
1223     for( i = -radius, maxk = 0; i <= radius; i++ )
1224         for( j = -radius; j <= radius; j++ )
1225         {
1226             double r = sqrt((double)i*i + (double)j*j);
1227             if( r > radius )
1228                 continue;
1229             space_weight[maxk] = (float)exp(r*r*gauss_space_coeff);
1230             space_ofs[maxk++] = i*temp_step + j*cn;
1231         }
1232 
1233     for( i = 0; i < size.height; i++ )
1234     {
1235 	    const float* sptr = temp->data.fl + (i+radius)*temp_step + radius*cn;
1236         float* dptr = (float*)(dst->data.ptr + i*dst->step);
1237 
1238         if( cn == 1 )
1239         {
1240             for( j = 0; j < size.width; j++ )
1241             {
1242                 float sum = 0, wsum = 0;
1243                 float val0 = sptr[j];
1244                 for( k = 0; k < maxk; k++ )
1245                 {
1246                     float val = sptr[j + space_ofs[k]];
1247 					float alpha = (float)(fabs(val - val0)*scale_index);
1248                     int idx = cvFloor(alpha);
1249                     alpha -= idx;
1250                     float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1251 	                sum += val*w;
1252                     wsum += w;
1253                 }
1254                 dptr[j] = (float)(sum/wsum);
1255             }
1256         }
1257         else
1258         {
1259             assert( cn == 3 );
1260             for( j = 0; j < size.width*3; j += 3 )
1261             {
1262                 float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
1263                 float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
1264                 for( k = 0; k < maxk; k++ )
1265                 {
1266                     const float* sptr_k = sptr + j + space_ofs[k];
1267                     float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
1268 					float alpha = (float)((fabs(b - b0) + fabs(g - g0) + fabs(r - r0))*scale_index);
1269                     int idx = cvFloor(alpha);
1270                     alpha -= idx;
1271                     float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx]));
1272                     sum_b += b*w; sum_g += g*w; sum_r += r*w;
1273                     wsum += w;
1274                 }
1275                 wsum = 1.f/wsum;
1276                 b0 = sum_b*wsum;
1277                 g0 = sum_g*wsum;
1278                 r0 = sum_r*wsum;
1279                 dptr[j] = b0; dptr[j+1] = g0; dptr[j+2] = r0;
1280             }
1281         }
1282     }
1283 
1284     __END__;
1285 
1286     cvReleaseMat( &temp );
1287     cvFree( &space_weight );
1288     cvFree( &space_ofs );
1289     cvFree( &expLUT );
1290 }
1291 
1292 //////////////////////////////// IPP smoothing functions /////////////////////////////////
1293 
1294 icvFilterMedian_8u_C1R_t icvFilterMedian_8u_C1R_p = 0;
1295 icvFilterMedian_8u_C3R_t icvFilterMedian_8u_C3R_p = 0;
1296 icvFilterMedian_8u_C4R_t icvFilterMedian_8u_C4R_p = 0;
1297 
1298 icvFilterBox_8u_C1R_t icvFilterBox_8u_C1R_p = 0;
1299 icvFilterBox_8u_C3R_t icvFilterBox_8u_C3R_p = 0;
1300 icvFilterBox_8u_C4R_t icvFilterBox_8u_C4R_p = 0;
1301 icvFilterBox_32f_C1R_t icvFilterBox_32f_C1R_p = 0;
1302 icvFilterBox_32f_C3R_t icvFilterBox_32f_C3R_p = 0;
1303 icvFilterBox_32f_C4R_t icvFilterBox_32f_C4R_p = 0;
1304 
1305 typedef CvStatus (CV_STDCALL * CvSmoothFixedIPPFunc)
1306 ( const void* src, int srcstep, void* dst, int dststep,
1307   CvSize size, CvSize ksize, CvPoint anchor );
1308 
1309 //////////////////////////////////////////////////////////////////////////////////////////
1310 
1311 CV_IMPL void
cvSmooth(const void * srcarr,void * dstarr,int smooth_type,int param1,int param2,double param3,double param4)1312 cvSmooth( const void* srcarr, void* dstarr, int smooth_type,
1313           int param1, int param2, double param3, double param4 )
1314 {
1315     CvBoxFilter box_filter;
1316     CvSepFilter gaussian_filter;
1317 
1318     CvMat* temp = 0;
1319 
1320     CV_FUNCNAME( "cvSmooth" );
1321 
1322     __BEGIN__;
1323 
1324     int coi1 = 0, coi2 = 0;
1325     CvMat srcstub, *src = (CvMat*)srcarr;
1326     CvMat dststub, *dst = (CvMat*)dstarr;
1327     CvSize size;
1328     int src_type, dst_type, depth, cn;
1329     double sigma1 = 0, sigma2 = 0;
1330     bool have_ipp = icvFilterMedian_8u_C1R_p != 0;
1331 
1332     CV_CALL( src = cvGetMat( src, &srcstub, &coi1 ));
1333     CV_CALL( dst = cvGetMat( dst, &dststub, &coi2 ));
1334 
1335     if( coi1 != 0 || coi2 != 0 )
1336         CV_ERROR( CV_BadCOI, "" );
1337 
1338     src_type = CV_MAT_TYPE( src->type );
1339     dst_type = CV_MAT_TYPE( dst->type );
1340     depth = CV_MAT_DEPTH(src_type);
1341     cn = CV_MAT_CN(src_type);
1342     size = cvGetMatSize(src);
1343 
1344     if( !CV_ARE_SIZES_EQ( src, dst ))
1345         CV_ERROR( CV_StsUnmatchedSizes, "" );
1346 
1347     if( smooth_type != CV_BLUR_NO_SCALE && !CV_ARE_TYPES_EQ( src, dst ))
1348         CV_ERROR( CV_StsUnmatchedFormats,
1349         "The specified smoothing algorithm requires input and ouput arrays be of the same type" );
1350 
1351     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE ||
1352         smooth_type == CV_GAUSSIAN || smooth_type == CV_MEDIAN )
1353     {
1354         // automatic detection of kernel size from sigma
1355         if( smooth_type == CV_GAUSSIAN )
1356         {
1357             sigma1 = param3;
1358             sigma2 = param4 ? param4 : param3;
1359 
1360             if( param1 == 0 && sigma1 > 0 )
1361                 param1 = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1362             if( param2 == 0 && sigma2 > 0 )
1363                 param2 = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
1364         }
1365 
1366         if( param2 == 0 )
1367             param2 = size.height == 1 ? 1 : param1;
1368         if( param1 < 1 || (param1 & 1) == 0 || param2 < 1 || (param2 & 1) == 0 )
1369             CV_ERROR( CV_StsOutOfRange,
1370                 "Both mask width and height must be >=1 and odd" );
1371 
1372         if( param1 == 1 && param2 == 1 )
1373         {
1374             cvConvert( src, dst );
1375             EXIT;
1376         }
1377     }
1378 
1379     if( have_ipp && (smooth_type == CV_BLUR || (smooth_type == CV_MEDIAN && param1 <= 15)) &&
1380         size.width >= param1 && size.height >= param2 && param1 > 1 && param2 > 1 )
1381     {
1382         CvSmoothFixedIPPFunc ipp_median_box_func = 0;
1383 
1384         if( smooth_type == CV_BLUR )
1385         {
1386             ipp_median_box_func =
1387                 src_type == CV_8UC1 ? icvFilterBox_8u_C1R_p :
1388                 src_type == CV_8UC3 ? icvFilterBox_8u_C3R_p :
1389                 src_type == CV_8UC4 ? icvFilterBox_8u_C4R_p :
1390                 src_type == CV_32FC1 ? icvFilterBox_32f_C1R_p :
1391                 src_type == CV_32FC3 ? icvFilterBox_32f_C3R_p :
1392                 src_type == CV_32FC4 ? icvFilterBox_32f_C4R_p : 0;
1393         }
1394         else if( smooth_type == CV_MEDIAN )
1395         {
1396             ipp_median_box_func =
1397                 src_type == CV_8UC1 ? icvFilterMedian_8u_C1R_p :
1398                 src_type == CV_8UC3 ? icvFilterMedian_8u_C3R_p :
1399                 src_type == CV_8UC4 ? icvFilterMedian_8u_C4R_p : 0;
1400         }
1401 
1402         if( ipp_median_box_func )
1403         {
1404             CvSize el_size = { param1, param2 };
1405             CvPoint el_anchor = { param1/2, param2/2 };
1406             int stripe_size = 1 << 14; // the optimal value may depend on CPU cache,
1407                                        // overhead of the current IPP code etc.
1408             const uchar* shifted_ptr;
1409             int y, dy = 0;
1410             int temp_step, dst_step = dst->step;
1411 
1412             CV_CALL( temp = icvIPPFilterInit( src, stripe_size, el_size ));
1413 
1414             shifted_ptr = temp->data.ptr +
1415                 el_anchor.y*temp->step + el_anchor.x*CV_ELEM_SIZE(src_type);
1416             temp_step = temp->step ? temp->step : CV_STUB_STEP;
1417 
1418             for( y = 0; y < src->rows; y += dy )
1419             {
1420                 dy = icvIPPFilterNextStripe( src, temp, y, el_size, el_anchor );
1421                 IPPI_CALL( ipp_median_box_func( shifted_ptr, temp_step,
1422                     dst->data.ptr + y*dst_step, dst_step, cvSize(src->cols, dy),
1423                     el_size, el_anchor ));
1424             }
1425             EXIT;
1426         }
1427     }
1428 
1429     if( smooth_type == CV_BLUR || smooth_type == CV_BLUR_NO_SCALE )
1430     {
1431         CV_CALL( box_filter.init( src->cols, src_type, dst_type,
1432             smooth_type == CV_BLUR, cvSize(param1, param2) ));
1433         CV_CALL( box_filter.process( src, dst ));
1434     }
1435     else if( smooth_type == CV_MEDIAN )
1436     {
1437         int img_size_mp = size.width*size.height;
1438         img_size_mp = (img_size_mp + (1<<19)) >> 20;
1439 
1440         if( depth != CV_8U || (cn != 1 && cn != 3 && cn != 4) )
1441             CV_ERROR( CV_StsUnsupportedFormat,
1442             "Median filter only supports 8uC1, 8uC3 and 8uC4 images" );
1443 
1444         if( size.width < param1*2 || size.height < param1*2 ||
1445             param1 <= 3 + (img_size_mp < 1 ? 12 : img_size_mp < 4 ? 6 : 2)*(MEDIAN_HAVE_SIMD ? 1 : 3))
1446         {
1447             // Special case optimized for 3x3
1448             IPPI_CALL( icvMedianBlur_8u_CnR_Om( src->data.ptr, src->step,
1449                 dst->data.ptr, dst->step, size, param1, cn ));
1450         }
1451         else
1452         {
1453             const int r = (param1 - 1) / 2;
1454             const int CACHE_SIZE = (int) ( 0.95 * 256 * 1024 / cn );  // assume a 256 kB cache size
1455             const int STRIPES = (int) cvCeil( (double) (size.width - 2*r) /
1456                     (CACHE_SIZE / sizeof(Histogram) - 2*r) );
1457             const int STRIPE_SIZE = (int) cvCeil(
1458                     (double) ( size.width + STRIPES*2*r - 2*r ) / STRIPES );
1459 
1460             for( int i = 0; i < size.width; i += STRIPE_SIZE - 2*r )
1461             {
1462                 int stripe = STRIPE_SIZE;
1463                 // Make sure that the filter kernel fits into one stripe.
1464                 if( i + STRIPE_SIZE - 2*r >= size.width ||
1465                     size.width - (i + STRIPE_SIZE - 2*r) < 2*r+1 )
1466                     stripe = size.width - i;
1467 
1468                 IPPI_CALL( icvMedianBlur_8u_CnR_O1( src->data.ptr + cn*i, src->step,
1469                     dst->data.ptr + cn*i, dst->step, cvSize(stripe, size.height),
1470                     param1, cn, i == 0, stripe == size.width - i ));
1471 
1472                 if( stripe == size.width - i )
1473                     break;
1474             }
1475         }
1476     }
1477     else if( smooth_type == CV_GAUSSIAN )
1478     {
1479         CvSize ksize = { param1, param2 };
1480         float* kx = (float*)cvStackAlloc( ksize.width*sizeof(kx[0]) );
1481         float* ky = (float*)cvStackAlloc( ksize.height*sizeof(ky[0]) );
1482         CvMat KX = cvMat( 1, ksize.width, CV_32F, kx );
1483         CvMat KY = cvMat( 1, ksize.height, CV_32F, ky );
1484 
1485         CvSepFilter::init_gaussian_kernel( &KX, sigma1 );
1486         if( ksize.width != ksize.height || fabs(sigma1 - sigma2) > FLT_EPSILON )
1487             CvSepFilter::init_gaussian_kernel( &KY, sigma2 );
1488         else
1489             KY.data.fl = kx;
1490 
1491         if( have_ipp && size.width >= param1*3 &&
1492             size.height >= param2 && param1 > 1 && param2 > 1 )
1493         {
1494             int done;
1495             CV_CALL( done = icvIPPSepFilter( src, dst, &KX, &KY,
1496                         cvPoint(ksize.width/2,ksize.height/2)));
1497             if( done )
1498                 EXIT;
1499         }
1500 
1501         CV_CALL( gaussian_filter.init( src->cols, src_type, dst_type, &KX, &KY ));
1502         CV_CALL( gaussian_filter.process( src, dst ));
1503     }
1504     else if( smooth_type == CV_BILATERAL )
1505     {
1506         if( param2 != 0 && (param2 != param1 || param1 % 2 == 0) )
1507             CV_ERROR( CV_StsBadSize, "Bilateral filter only supports square windows of odd size" );
1508 
1509         switch( src_type )
1510         {
1511         case CV_32FC1:
1512         case CV_32FC3:
1513             CV_CALL( icvBilateralFiltering_32f( src, dst, param1, param3, param4 ));
1514             break;
1515         case CV_8UC1:
1516         case CV_8UC3:
1517             CV_CALL( icvBilateralFiltering_8u( src, dst, param1, param3, param4 ));
1518             break;
1519         default:
1520             CV_ERROR( CV_StsUnsupportedFormat,
1521                 "Unknown/unsupported format: bilateral filter only supports 8uC1, 8uC3, 32fC1 and 32fC3 formats" );
1522         }
1523     }
1524 
1525     __END__;
1526 
1527     cvReleaseMat( &temp );
1528 }
1529 
1530 /* End of file. */
1531