• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 
45 #undef HAVE_IPP
46 
47 namespace cv { namespace hal {
48 
49 ///////////////////////////////////// ATAN2 ////////////////////////////////////
50 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
51 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
52 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
53 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
54 
55 #if CV_NEON
cv_vrecpq_f32(float32x4_t val)56 static inline float32x4_t cv_vrecpq_f32(float32x4_t val)
57 {
58     float32x4_t reciprocal = vrecpeq_f32(val);
59     reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
60     reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
61     return reciprocal;
62 }
63 #endif
64 
fastAtan2(const float * Y,const float * X,float * angle,int len,bool angleInDegrees)65 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
66 {
67     int i = 0;
68     float scale = angleInDegrees ? 1 : (float)(CV_PI/180);
69 
70 #ifdef HAVE_TEGRA_OPTIMIZATION
71     if (tegra::useTegra() && tegra::FastAtan2_32f(Y, X, angle, len, scale))
72         return;
73 #endif
74 
75 #if CV_SSE2
76     Cv32suf iabsmask; iabsmask.i = 0x7fffffff;
77     __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f);
78     __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f);
79     __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale);
80     __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3);
81     __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7);
82 
83     for( ; i <= len - 4; i += 4 )
84     {
85         __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i);
86         __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask);
87         __m128 mask = _mm_cmplt_ps(ax, ay);
88         __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay);
89         __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps));
90         __m128 c2 = _mm_mul_ps(c, c);
91         __m128 a = _mm_mul_ps(c2, p7);
92         a = _mm_mul_ps(_mm_add_ps(a, p5), c2);
93         a = _mm_mul_ps(_mm_add_ps(a, p3), c2);
94         a = _mm_mul_ps(_mm_add_ps(a, p1), c);
95 
96         __m128 b = _mm_sub_ps(_90, a);
97         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
98 
99         b = _mm_sub_ps(_180, a);
100         mask = _mm_cmplt_ps(x, z);
101         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
102 
103         b = _mm_sub_ps(_360, a);
104         mask = _mm_cmplt_ps(y, z);
105         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
106 
107         a = _mm_mul_ps(a, scale4);
108         _mm_storeu_ps(angle + i, a);
109     }
110 #elif CV_NEON
111     float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON);
112     float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f);
113     float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale);
114     float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3);
115     float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7);
116 
117     for( ; i <= len - 4; i += 4 )
118     {
119         float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i);
120         float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y);
121         float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay);
122         float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps)));
123         float32x4_t c2 = vmulq_f32(c, c);
124         float32x4_t a = vmulq_f32(c2, p7);
125         a = vmulq_f32(vaddq_f32(a, p5), c2);
126         a = vmulq_f32(vaddq_f32(a, p3), c2);
127         a = vmulq_f32(vaddq_f32(a, p1), c);
128 
129         a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a));
130         a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a);
131         a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a);
132 
133         vst1q_f32(angle + i, vmulq_f32(a, scale4));
134     }
135 #endif
136 
137     for( ; i < len; i++ )
138     {
139         float x = X[i], y = Y[i];
140         float ax = std::abs(x), ay = std::abs(y);
141         float a, c, c2;
142         if( ax >= ay )
143         {
144             c = ay/(ax + (float)DBL_EPSILON);
145             c2 = c*c;
146             a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
147         }
148         else
149         {
150             c = ax/(ay + (float)DBL_EPSILON);
151             c2 = c*c;
152             a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
153         }
154         if( x < 0 )
155             a = 180.f - a;
156         if( y < 0 )
157             a = 360.f - a;
158         angle[i] = (float)(a*scale);
159     }
160 }
161 
162 
magnitude(const float * x,const float * y,float * mag,int len)163 void magnitude(const float* x, const float* y, float* mag, int len)
164 {
165 #if defined HAVE_IPP
166     CV_IPP_CHECK()
167     {
168         IppStatus status = ippsMagnitude_32f(x, y, mag, len);
169         if (status >= 0)
170         {
171             CV_IMPL_ADD(CV_IMPL_IPP);
172             return;
173         }
174         setIppErrorStatus();
175     }
176 #endif
177 
178     int i = 0;
179 
180 #if CV_SIMD128
181     for( ; i <= len - 8; i += 8 )
182     {
183         v_float32x4 x0 = v_load(x + i), x1 = v_load(x + i + 4);
184         v_float32x4 y0 = v_load(y + i), y1 = v_load(y + i + 4);
185         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
186         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
187         v_store(mag + i, x0);
188         v_store(mag + i + 4, x1);
189     }
190 #endif
191 
192     for( ; i < len; i++ )
193     {
194         float x0 = x[i], y0 = y[i];
195         mag[i] = std::sqrt(x0*x0 + y0*y0);
196     }
197 }
198 
magnitude(const double * x,const double * y,double * mag,int len)199 void magnitude(const double* x, const double* y, double* mag, int len)
200 {
201 #if defined(HAVE_IPP)
202     CV_IPP_CHECK()
203     {
204         IppStatus status = ippsMagnitude_64f(x, y, mag, len);
205         if (status >= 0)
206         {
207             CV_IMPL_ADD(CV_IMPL_IPP);
208             return;
209         }
210         setIppErrorStatus();
211     }
212 #endif
213 
214     int i = 0;
215 
216 #if CV_SIMD128_64F
217     for( ; i <= len - 4; i += 4 )
218     {
219         v_float64x2 x0 = v_load(x + i), x1 = v_load(x + i + 2);
220         v_float64x2 y0 = v_load(y + i), y1 = v_load(y + i + 2);
221         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
222         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
223         v_store(mag + i, x0);
224         v_store(mag + i + 2, x1);
225     }
226 #endif
227 
228     for( ; i < len; i++ )
229     {
230         double x0 = x[i], y0 = y[i];
231         mag[i] = std::sqrt(x0*x0 + y0*y0);
232     }
233 }
234 
235 
invSqrt(const float * src,float * dst,int len)236 void invSqrt(const float* src, float* dst, int len)
237 {
238 #if defined(HAVE_IPP)
239     CV_IPP_CHECK()
240     {
241         if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
242         {
243             CV_IMPL_ADD(CV_IMPL_IPP);
244             return;
245         }
246         setIppErrorStatus();
247     }
248 #endif
249 
250     int i = 0;
251 
252 #if CV_SIMD128
253     for( ; i <= len - 8; i += 8 )
254     {
255         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
256         t0 = v_invsqrt(t0);
257         t1 = v_invsqrt(t1);
258         v_store(dst + i, t0); v_store(dst + i + 4, t1);
259     }
260 #endif
261 
262     for( ; i < len; i++ )
263         dst[i] = 1/std::sqrt(src[i]);
264 }
265 
266 
invSqrt(const double * src,double * dst,int len)267 void invSqrt(const double* src, double* dst, int len)
268 {
269     int i = 0;
270 
271 #if CV_SSE2
272     __m128d v_1 = _mm_set1_pd(1.0);
273     for ( ; i <= len - 2; i += 2)
274         _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i))));
275 #endif
276 
277     for( ; i < len; i++ )
278         dst[i] = 1/std::sqrt(src[i]);
279 }
280 
281 
sqrt(const float * src,float * dst,int len)282 void sqrt(const float* src, float* dst, int len)
283 {
284 #if defined(HAVE_IPP)
285     CV_IPP_CHECK()
286     {
287         if (ippsSqrt_32f_A21(src, dst, len) >= 0)
288         {
289             CV_IMPL_ADD(CV_IMPL_IPP);
290             return;
291         }
292         setIppErrorStatus();
293     }
294 #endif
295 
296     int i = 0;
297 
298 #if CV_SIMD128
299     for( ; i <= len - 8; i += 8 )
300     {
301         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
302         t0 = v_sqrt(t0);
303         t1 = v_sqrt(t1);
304         v_store(dst + i, t0); v_store(dst + i + 4, t1);
305     }
306 #endif
307 
308     for( ; i < len; i++ )
309         dst[i] = std::sqrt(src[i]);
310 }
311 
312 
sqrt(const double * src,double * dst,int len)313 void sqrt(const double* src, double* dst, int len)
314 {
315 #if defined(HAVE_IPP)
316     CV_IPP_CHECK()
317     {
318         if (ippsSqrt_64f_A50(src, dst, len) >= 0)
319         {
320             CV_IMPL_ADD(CV_IMPL_IPP);
321             return;
322         }
323         setIppErrorStatus();
324     }
325 #endif
326 
327     int i = 0;
328 
329 #if CV_SIMD128_64F
330     for( ; i <= len - 4; i += 4 )
331     {
332         v_float64x2 t0 = v_load(src + i), t1 = v_load(src + i + 2);
333         t0 = v_sqrt(t0);
334         t1 = v_sqrt(t1);
335         v_store(dst + i, t0); v_store(dst + i + 2, t1);
336     }
337 #endif
338 
339     for( ; i < len; i++ )
340         dst[i] = std::sqrt(src[i]);
341 }
342 
343 ////////////////////////////////////// EXP /////////////////////////////////////
344 
345 typedef union
346 {
347     struct {
348 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ )
349         int hi;
350         int lo;
351 #else
352         int lo;
353         int hi;
354 #endif
355     } i;
356     double d;
357 }
358 DBLINT;
359 
360 #define EXPTAB_SCALE 6
361 #define EXPTAB_MASK  ((1 << EXPTAB_SCALE) - 1)
362 
363 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
364 
365 static const double expTab[] = {
366     1.0 * EXPPOLY_32F_A0,
367     1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
368     1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
369     1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
370     1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
371     1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
372     1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
373     1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
374     1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
375     1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
376     1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
377     1.126521618608241899794798643787 * EXPPOLY_32F_A0,
378     1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
379     1.151189229952982705817759635202 * EXPPOLY_32F_A0,
380     1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
381     1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
382     1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
383     1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
384     1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
385     1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
386     1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
387     1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
388     1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
389     1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
390     1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
391     1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
392     1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
393     1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
394     1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
395     1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
396     1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
397     1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
398     1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
399     1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
400     1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
401     1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
402     1.476826145939499311386907480374 * EXPPOLY_32F_A0,
403     1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
404     1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
405     1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
406     1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
407     1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
408     1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
409     1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
410     1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
411     1.628027421857347766848218522014 * EXPPOLY_32F_A0,
412     1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
413     1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
414     1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
415     1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
416     1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
417     1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
418     1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
419     1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
420     1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
421     1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
422     1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
423     1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
424     1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
425     1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
426     1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
427     1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
428     1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
429     1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
430 };
431 
432 
433 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
434 #if (defined _MSC_VER && _MSC_VER < 1500) || \
435 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
436 #undef CV_SSE2
437 #define CV_SSE2 0
438 #endif
439 
440 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
441 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
442 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
443 
exp(const float * _x,float * y,int n)444 void exp( const float *_x, float *y, int n )
445 {
446     static const float
447     A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
448     A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
449     A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
450     A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
451 
452 #undef EXPPOLY
453 #define EXPPOLY(x)  \
454 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)
455 
456     int i = 0;
457     const Cv32suf* x = (const Cv32suf*)_x;
458     Cv32suf buf[4];
459 
460 #if CV_SSE2
461     if( n >= 8 )
462     {
463         static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
464         static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale);
465         static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
466         static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
467 
468         static const __m128 mA1 = _mm_set1_ps(A1);
469         static const __m128 mA2 = _mm_set1_ps(A2);
470         static const __m128 mA3 = _mm_set1_ps(A3);
471         static const __m128 mA4 = _mm_set1_ps(A4);
472         bool y_aligned = (size_t)(void*)y % 16 == 0;
473 
474         ushort CV_DECL_ALIGNED(16) tab_idx[8];
475 
476         for( ; i <= n - 8; i += 8 )
477         {
478             __m128 xf0, xf1;
479             xf0 = _mm_loadu_ps(&x[i].f);
480             xf1 = _mm_loadu_ps(&x[i+4].f);
481             __m128i xi0, xi1, xi2, xi3;
482 
483             xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4);
484             xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4);
485 
486             __m128d xd0 = _mm_cvtps_pd(xf0);
487             __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0));
488             __m128d xd1 = _mm_cvtps_pd(xf1);
489             __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1));
490 
491             xd0 = _mm_mul_pd(xd0, prescale2);
492             xd2 = _mm_mul_pd(xd2, prescale2);
493             xd1 = _mm_mul_pd(xd1, prescale2);
494             xd3 = _mm_mul_pd(xd3, prescale2);
495 
496             xi0 = _mm_cvtpd_epi32(xd0);
497             xi2 = _mm_cvtpd_epi32(xd2);
498 
499             xi1 = _mm_cvtpd_epi32(xd1);
500             xi3 = _mm_cvtpd_epi32(xd3);
501 
502             xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0));
503             xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2));
504             xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1));
505             xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3));
506 
507             xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2));
508             xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3));
509 
510             xf0 = _mm_mul_ps(xf0, postscale4);
511             xf1 = _mm_mul_ps(xf1, postscale4);
512 
513             xi0 = _mm_unpacklo_epi64(xi0, xi2);
514             xi1 = _mm_unpacklo_epi64(xi1, xi3);
515             xi0 = _mm_packs_epi32(xi0, xi1);
516 
517             _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
518 
519             xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
520             xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
521             xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
522             xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
523             xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
524 
525             __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
526             __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
527             __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5]));
528             __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7]));
529 
530             __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
531             __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3));
532 
533             yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23)));
534             yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23)));
535 
536             __m128 zf0 = _mm_add_ps(xf0, mA1);
537             __m128 zf1 = _mm_add_ps(xf1, mA1);
538 
539             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2);
540             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2);
541 
542             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3);
543             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3);
544 
545             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4);
546             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4);
547 
548             zf0 = _mm_mul_ps(zf0, yf0);
549             zf1 = _mm_mul_ps(zf1, yf1);
550 
551             if( y_aligned )
552             {
553                 _mm_store_ps(y + i, zf0);
554                 _mm_store_ps(y + i + 4, zf1);
555             }
556             else
557             {
558                 _mm_storeu_ps(y + i, zf0);
559                 _mm_storeu_ps(y + i + 4, zf1);
560             }
561         }
562     }
563     else
564 #endif
565         for( ; i <= n - 4; i += 4 )
566         {
567             double x0 = x[i].f * exp_prescale;
568             double x1 = x[i + 1].f * exp_prescale;
569             double x2 = x[i + 2].f * exp_prescale;
570             double x3 = x[i + 3].f * exp_prescale;
571             int val0, val1, val2, val3, t;
572 
573             if( ((x[i].i >> 23) & 255) > 127 + 10 )
574                 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
575 
576             if( ((x[i+1].i >> 23) & 255) > 127 + 10 )
577                 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val;
578 
579             if( ((x[i+2].i >> 23) & 255) > 127 + 10 )
580                 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val;
581 
582             if( ((x[i+3].i >> 23) & 255) > 127 + 10 )
583                 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val;
584 
585             val0 = cvRound(x0);
586             val1 = cvRound(x1);
587             val2 = cvRound(x2);
588             val3 = cvRound(x3);
589 
590             x0 = (x0 - val0)*exp_postscale;
591             x1 = (x1 - val1)*exp_postscale;
592             x2 = (x2 - val2)*exp_postscale;
593             x3 = (x3 - val3)*exp_postscale;
594 
595             t = (val0 >> EXPTAB_SCALE) + 127;
596             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
597             buf[0].i = t << 23;
598 
599             t = (val1 >> EXPTAB_SCALE) + 127;
600             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
601             buf[1].i = t << 23;
602 
603             t = (val2 >> EXPTAB_SCALE) + 127;
604             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
605             buf[2].i = t << 23;
606 
607             t = (val3 >> EXPTAB_SCALE) + 127;
608             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
609             buf[3].i = t << 23;
610 
611             x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
612             x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
613 
614             y[i] = (float)x0;
615             y[i + 1] = (float)x1;
616 
617             x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
618             x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
619 
620             y[i + 2] = (float)x2;
621             y[i + 3] = (float)x3;
622         }
623 
624     for( ; i < n; i++ )
625     {
626         double x0 = x[i].f * exp_prescale;
627         int val0, t;
628 
629         if( ((x[i].i >> 23) & 255) > 127 + 10 )
630             x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
631 
632         val0 = cvRound(x0);
633         t = (val0 >> EXPTAB_SCALE) + 127;
634         t = !(t & ~255) ? t : t < 0 ? 0 : 255;
635 
636         buf[0].i = t << 23;
637         x0 = (x0 - val0)*exp_postscale;
638 
639         y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0));
640     }
641 }
642 
exp(const double * _x,double * y,int n)643 void exp( const double *_x, double *y, int n )
644 {
645     static const double
646     A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
647     A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
648     A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
649     A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
650     A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
651     A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
652 
653 #undef EXPPOLY
654 #define EXPPOLY(x)  (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5)
655 
656     int i = 0;
657     Cv64suf buf[4];
658     const Cv64suf* x = (const Cv64suf*)_x;
659 
660 #if CV_SSE2
661     static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
662     static const __m128d postscale2 = _mm_set1_pd(exp_postscale);
663     static const __m128d maxval2 = _mm_set1_pd(exp_max_val);
664     static const __m128d minval2 = _mm_set1_pd(-exp_max_val);
665 
666     static const __m128d mA0 = _mm_set1_pd(A0);
667     static const __m128d mA1 = _mm_set1_pd(A1);
668     static const __m128d mA2 = _mm_set1_pd(A2);
669     static const __m128d mA3 = _mm_set1_pd(A3);
670     static const __m128d mA4 = _mm_set1_pd(A4);
671     static const __m128d mA5 = _mm_set1_pd(A5);
672 
673     int CV_DECL_ALIGNED(16) tab_idx[4];
674 
675     for( ; i <= n - 4; i += 4 )
676     {
677         __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f);
678         __m128i xi0, xi1;
679         xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2);
680         xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2);
681         xf0 = _mm_mul_pd(xf0, prescale2);
682         xf1 = _mm_mul_pd(xf1, prescale2);
683 
684         xi0 = _mm_cvtpd_epi32(xf0);
685         xi1 = _mm_cvtpd_epi32(xf1);
686         xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2);
687         xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2);
688 
689         xi0 = _mm_unpacklo_epi64(xi0, xi1);
690         _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK)));
691 
692         xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023));
693         xi0 = _mm_packs_epi32(xi0, xi0);
694         xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
695         xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047));
696         xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
697         xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128());
698         xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128());
699 
700         __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
701         __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
702         yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52)));
703         yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52)));
704 
705         __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1);
706         __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1);
707 
708         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2);
709         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2);
710 
711         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3);
712         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3);
713 
714         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4);
715         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4);
716 
717         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5);
718         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5);
719 
720         zf0 = _mm_mul_pd(zf0, yf0);
721         zf1 = _mm_mul_pd(zf1, yf1);
722 
723         _mm_storeu_pd(y + i, zf0);
724         _mm_storeu_pd(y + i + 2, zf1);
725     }
726 #endif
727     for( ; i <= n - 4; i += 4 )
728     {
729         double x0 = x[i].f * exp_prescale;
730         double x1 = x[i + 1].f * exp_prescale;
731         double x2 = x[i + 2].f * exp_prescale;
732         double x3 = x[i + 3].f * exp_prescale;
733 
734         double y0, y1, y2, y3;
735         int val0, val1, val2, val3, t;
736 
737         t = (int)(x[i].i >> 52);
738         if( (t & 2047) > 1023 + 10 )
739             x0 = t < 0 ? -exp_max_val : exp_max_val;
740 
741         t = (int)(x[i+1].i >> 52);
742         if( (t & 2047) > 1023 + 10 )
743             x1 = t < 0 ? -exp_max_val : exp_max_val;
744 
745         t = (int)(x[i+2].i >> 52);
746         if( (t & 2047) > 1023 + 10 )
747             x2 = t < 0 ? -exp_max_val : exp_max_val;
748 
749         t = (int)(x[i+3].i >> 52);
750         if( (t & 2047) > 1023 + 10 )
751             x3 = t < 0 ? -exp_max_val : exp_max_val;
752 
753         val0 = cvRound(x0);
754         val1 = cvRound(x1);
755         val2 = cvRound(x2);
756         val3 = cvRound(x3);
757 
758         x0 = (x0 - val0)*exp_postscale;
759         x1 = (x1 - val1)*exp_postscale;
760         x2 = (x2 - val2)*exp_postscale;
761         x3 = (x3 - val3)*exp_postscale;
762 
763         t = (val0 >> EXPTAB_SCALE) + 1023;
764         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
765         buf[0].i = (int64)t << 52;
766 
767         t = (val1 >> EXPTAB_SCALE) + 1023;
768         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
769         buf[1].i = (int64)t << 52;
770 
771         t = (val2 >> EXPTAB_SCALE) + 1023;
772         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
773         buf[2].i = (int64)t << 52;
774 
775         t = (val3 >> EXPTAB_SCALE) + 1023;
776         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
777         buf[3].i = (int64)t << 52;
778 
779         y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
780         y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
781 
782         y[i] = y0;
783         y[i + 1] = y1;
784 
785         y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
786         y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
787 
788         y[i + 2] = y2;
789         y[i + 3] = y3;
790     }
791 
792     for( ; i < n; i++ )
793     {
794         double x0 = x[i].f * exp_prescale;
795         int val0, t;
796 
797         t = (int)(x[i].i >> 52);
798         if( (t & 2047) > 1023 + 10 )
799             x0 = t < 0 ? -exp_max_val : exp_max_val;
800 
801         val0 = cvRound(x0);
802         t = (val0 >> EXPTAB_SCALE) + 1023;
803         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
804 
805         buf[0].i = (int64)t << 52;
806         x0 = (x0 - val0)*exp_postscale;
807 
808         y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
809     }
810 }
811 
812 #undef EXPTAB_SCALE
813 #undef EXPTAB_MASK
814 #undef EXPPOLY_32F_A0
815 
816 /////////////////////////////////////////// LOG ///////////////////////////////////////
817 
818 #define LOGTAB_SCALE    8
819 #define LOGTAB_MASK         ((1 << LOGTAB_SCALE) - 1)
820 #define LOGTAB_MASK2        ((1 << (20 - LOGTAB_SCALE)) - 1)
821 #define LOGTAB_MASK2_32F    ((1 << (23 - LOGTAB_SCALE)) - 1)
822 
823 static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
824     0.0000000000000000000000000000000000000000,    1.000000000000000000000000000000000000000,
825     .00389864041565732288852075271279318258166,    .9961089494163424124513618677042801556420,
826     .00778214044205494809292034119607706088573,    .9922480620155038759689922480620155038760,
827     .01165061721997527263705585198749759001657,    .9884169884169884169884169884169884169884,
828     .01550418653596525274396267235488267033361,    .9846153846153846153846153846153846153846,
829     .01934296284313093139406447562578250654042,    .9808429118773946360153256704980842911877,
830     .02316705928153437593630670221500622574241,    .9770992366412213740458015267175572519084,
831     .02697658769820207233514075539915211265906,    .9733840304182509505703422053231939163498,
832     .03077165866675368732785500469617545604706,    .9696969696969696969696969696969696969697,
833     .03455238150665972812758397481047722976656,    .9660377358490566037735849056603773584906,
834     .03831886430213659461285757856785494368522,    .9624060150375939849624060150375939849624,
835     .04207121392068705056921373852674150839447,    .9588014981273408239700374531835205992509,
836     .04580953603129420126371940114040626212953,    .9552238805970149253731343283582089552239,
837     .04953393512227662748292900118940451648088,    .9516728624535315985130111524163568773234,
838     .05324451451881227759255210685296333394944,    .9481481481481481481481481481481481481481,
839     .05694137640013842427411105973078520037234,    .9446494464944649446494464944649446494465,
840     .06062462181643483993820353816772694699466,    .9411764705882352941176470588235294117647,
841     .06429435070539725460836422143984236754475,    .9377289377289377289377289377289377289377,
842     .06795066190850773679699159401934593915938,    .9343065693430656934306569343065693430657,
843     .07159365318700880442825962290953611955044,    .9309090909090909090909090909090909090909,
844     .07522342123758751775142172846244648098944,    .9275362318840579710144927536231884057971,
845     .07884006170777602129362549021607264876369,    .9241877256317689530685920577617328519856,
846     .08244366921107458556772229485432035289706,    .9208633093525179856115107913669064748201,
847     .08603433734180314373940490213499288074675,    .9175627240143369175627240143369175627240,
848     .08961215868968712416897659522874164395031,    .9142857142857142857142857142857142857143,
849     .09317722485418328259854092721070628613231,    .9110320284697508896797153024911032028470,
850     .09672962645855109897752299730200320482256,    .9078014184397163120567375886524822695035,
851     .10026945316367513738597949668474029749630,    .9045936395759717314487632508833922261484,
852     .10379679368164355934833764649738441221420,    .9014084507042253521126760563380281690141,
853     .10731173578908805021914218968959175981580,    .8982456140350877192982456140350877192982,
854     .11081436634029011301105782649756292812530,    .8951048951048951048951048951048951048951,
855     .11430477128005862852422325204315711744130,    .8919860627177700348432055749128919860627,
856     .11778303565638344185817487641543266363440,    .8888888888888888888888888888888888888889,
857     .12124924363286967987640707633545389398930,    .8858131487889273356401384083044982698962,
858     .12470347850095722663787967121606925502420,    .8827586206896551724137931034482758620690,
859     .12814582269193003360996385708858724683530,    .8797250859106529209621993127147766323024,
860     .13157635778871926146571524895989568904040,    .8767123287671232876712328767123287671233,
861     .13499516453750481925766280255629681050780,    .8737201365187713310580204778156996587031,
862     .13840232285911913123754857224412262439730,    .8707482993197278911564625850340136054422,
863     .14179791186025733629172407290752744302150,    .8677966101694915254237288135593220338983,
864     .14518200984449788903951628071808954700830,    .8648648648648648648648648648648648648649,
865     .14855469432313711530824207329715136438610,    .8619528619528619528619528619528619528620,
866     .15191604202584196858794030049466527998450,    .8590604026845637583892617449664429530201,
867     .15526612891112392955683674244937719777230,    .8561872909698996655518394648829431438127,
868     .15860503017663857283636730244325008243330,    .8533333333333333333333333333333333333333,
869     .16193282026931324346641360989451641216880,    .8504983388704318936877076411960132890365,
870     .16524957289530714521497145597095368430010,    .8476821192052980132450331125827814569536,
871     .16855536102980664403538924034364754334090,    .8448844884488448844884488448844884488449,
872     .17185025692665920060697715143760433420540,    .8421052631578947368421052631578947368421,
873     .17513433212784912385018287750426679849630,    .8393442622950819672131147540983606557377,
874     .17840765747281828179637841458315961062910,    .8366013071895424836601307189542483660131,
875     .18167030310763465639212199675966985523700,    .8338762214983713355048859934853420195440,
876     .18492233849401198964024217730184318497780,    .8311688311688311688311688311688311688312,
877     .18816383241818296356839823602058459073300,    .8284789644012944983818770226537216828479,
878     .19139485299962943898322009772527962923050,    .8258064516129032258064516129032258064516,
879     .19461546769967164038916962454095482826240,    .8231511254019292604501607717041800643087,
880     .19782574332991986754137769821682013571260,    .8205128205128205128205128205128205128205,
881     .20102574606059073203390141770796617493040,    .8178913738019169329073482428115015974441,
882     .20421554142869088876999228432396193966280,    .8152866242038216560509554140127388535032,
883     .20739519434607056602715147164417430758480,    .8126984126984126984126984126984126984127,
884     .21056476910734961416338251183333341032260,    .8101265822784810126582278481012658227848,
885     .21372432939771812687723695489694364368910,    .8075709779179810725552050473186119873817,
886     .21687393830061435506806333251006435602900,    .8050314465408805031446540880503144654088,
887     .22001365830528207823135744547471404075630,    .8025078369905956112852664576802507836991,
888     .22314355131420973710199007200571941211830,    .8000000000000000000000000000000000000000,
889     .22626367865045338145790765338460914790630,    .7975077881619937694704049844236760124611,
890     .22937410106484582006380890106811420992010,    .7950310559006211180124223602484472049689,
891     .23247487874309405442296849741978803649550,    .7925696594427244582043343653250773993808,
892     .23556607131276688371634975283086532726890,    .7901234567901234567901234567901234567901,
893     .23864773785017498464178231643018079921600,    .7876923076923076923076923076923076923077,
894     .24171993688714515924331749374687206000090,    .7852760736196319018404907975460122699387,
895     .24478272641769091566565919038112042471760,    .7828746177370030581039755351681957186544,
896     .24783616390458124145723672882013488560910,    .7804878048780487804878048780487804878049,
897     .25088030628580937353433455427875742316250,    .7781155015197568389057750759878419452888,
898     .25391520998096339667426946107298135757450,    .7757575757575757575757575757575757575758,
899     .25694093089750041913887912414793390780680,    .7734138972809667673716012084592145015106,
900     .25995752443692604627401010475296061486000,    .7710843373493975903614457831325301204819,
901     .26296504550088134477547896494797896593800,    .7687687687687687687687687687687687687688,
902     .26596354849713793599974565040611196309330,    .7664670658682634730538922155688622754491,
903     .26895308734550393836570947314612567424780,    .7641791044776119402985074626865671641791,
904     .27193371548364175804834985683555714786050,    .7619047619047619047619047619047619047619,
905     .27490548587279922676529508862586226314300,    .7596439169139465875370919881305637982196,
906     .27786845100345625159121709657483734190480,    .7573964497041420118343195266272189349112,
907     .28082266290088775395616949026589281857030,    .7551622418879056047197640117994100294985,
908     .28376817313064456316240580235898960381750,    .7529411764705882352941176470588235294118,
909     .28670503280395426282112225635501090437180,    .7507331378299120234604105571847507331378,
910     .28963329258304265634293983566749375313530,    .7485380116959064327485380116959064327485,
911     .29255300268637740579436012922087684273730,    .7463556851311953352769679300291545189504,
912     .29546421289383584252163927885703742504130,    .7441860465116279069767441860465116279070,
913     .29836697255179722709783618483925238251680,    .7420289855072463768115942028985507246377,
914     .30126133057816173455023545102449133992200,    .7398843930635838150289017341040462427746,
915     .30414733546729666446850615102448500692850,    .7377521613832853025936599423631123919308,
916     .30702503529491181888388950937951449304830,    .7356321839080459770114942528735632183908,
917     .30989447772286465854207904158101882785550,    .7335243553008595988538681948424068767908,
918     .31275571000389684739317885942000430077330,    .7314285714285714285714285714285714285714,
919     .31560877898630329552176476681779604405180,    .7293447293447293447293447293447293447293,
920     .31845373111853458869546784626436419785030,    .7272727272727272727272727272727272727273,
921     .32129061245373424782201254856772720813750,    .7252124645892351274787535410764872521246,
922     .32411946865421192853773391107097268104550,    .7231638418079096045197740112994350282486,
923     .32694034499585328257253991068864706903700,    .7211267605633802816901408450704225352113,
924     .32975328637246797969240219572384376078850,    .7191011235955056179775280898876404494382,
925     .33255833730007655635318997155991382896900,    .7170868347338935574229691876750700280112,
926     .33535554192113781191153520921943709254280,    .7150837988826815642458100558659217877095,
927     .33814494400871636381467055798566434532400,    .7130919220055710306406685236768802228412,
928     .34092658697059319283795275623560883104800,    .7111111111111111111111111111111111111111,
929     .34370051385331840121395430287520866841080,    .7091412742382271468144044321329639889197,
930     .34646676734620857063262633346312213689100,    .7071823204419889502762430939226519337017,
931     .34922538978528827602332285096053965389730,    .7052341597796143250688705234159779614325,
932     .35197642315717814209818925519357435405250,    .7032967032967032967032967032967032967033,
933     .35471990910292899856770532096561510115850,    .7013698630136986301369863013698630136986,
934     .35745588892180374385176833129662554711100,    .6994535519125683060109289617486338797814,
935     .36018440357500774995358483465679455548530,    .6975476839237057220708446866485013623978,
936     .36290549368936841911903457003063522279280,    .6956521739130434782608695652173913043478,
937     .36561919956096466943762379742111079394830,    .6937669376693766937669376693766937669377,
938     .36832556115870762614150635272380895912650,    .6918918918918918918918918918918918918919,
939     .37102461812787262962487488948681857436900,    .6900269541778975741239892183288409703504,
940     .37371640979358405898480555151763837784530,    .6881720430107526881720430107526881720430,
941     .37640097516425302659470730759494472295050,    .6863270777479892761394101876675603217158,
942     .37907835293496944251145919224654790014030,    .6844919786096256684491978609625668449198,
943     .38174858149084833769393299007788300514230,    .6826666666666666666666666666666666666667,
944     .38441169891033200034513583887019194662580,    .6808510638297872340425531914893617021277,
945     .38706774296844825844488013899535872042180,    .6790450928381962864721485411140583554377,
946     .38971675114002518602873692543653305619950,    .6772486772486772486772486772486772486772,
947     .39235876060286384303665840889152605086580,    .6754617414248021108179419525065963060686,
948     .39499380824086893770896722344332374632350,    .6736842105263157894736842105263157894737,
949     .39762193064713846624158577469643205404280,    .6719160104986876640419947506561679790026,
950     .40024316412701266276741307592601515352730,    .6701570680628272251308900523560209424084,
951     .40285754470108348090917615991202183067800,    .6684073107049608355091383812010443864230,
952     .40546510810816432934799991016916465014230,    .6666666666666666666666666666666666666667,
953     .40806588980822172674223224930756259709600,    .6649350649350649350649350649350649350649,
954     .41065992498526837639616360320360399782650,    .6632124352331606217616580310880829015544,
955     .41324724855021932601317757871584035456180,    .6614987080103359173126614987080103359173,
956     .41582789514371093497757669865677598863850,    .6597938144329896907216494845360824742268,
957     .41840189913888381489925905043492093682300,    .6580976863753213367609254498714652956298,
958     .42096929464412963239894338585145305842150,    .6564102564102564102564102564102564102564,
959     .42353011550580327293502591601281892508280,    .6547314578005115089514066496163682864450,
960     .42608439531090003260516141381231136620050,    .6530612244897959183673469387755102040816,
961     .42863216738969872610098832410585600882780,    .6513994910941475826972010178117048346056,
962     .43117346481837132143866142541810404509300,    .6497461928934010152284263959390862944162,
963     .43370832042155937902094819946796633303180,    .6481012658227848101265822784810126582278,
964     .43623676677491801667585491486534010618930,    .6464646464646464646464646464646464646465,
965     .43875883620762790027214350629947148263450,    .6448362720403022670025188916876574307305,
966     .44127456080487520440058801796112675219780,    .6432160804020100502512562814070351758794,
967     .44378397241030093089975139264424797147500,    .6416040100250626566416040100250626566416,
968     .44628710262841947420398014401143882423650,    .6400000000000000000000000000000000000000,
969     .44878398282700665555822183705458883196130,    .6384039900249376558603491271820448877805,
970     .45127464413945855836729492693848442286250,    .6368159203980099502487562189054726368159,
971     .45375911746712049854579618113348260521900,    .6352357320099255583126550868486352357320,
972     .45623743348158757315857769754074979573500,    .6336633663366336633663366336633663366337,
973     .45870962262697662081833982483658473938700,    .6320987654320987654320987654320987654321,
974     .46117571512217014895185229761409573256980,    .6305418719211822660098522167487684729064,
975     .46363574096303250549055974261136725544930,    .6289926289926289926289926289926289926290,
976     .46608972992459918316399125615134835243230,    .6274509803921568627450980392156862745098,
977     .46853771156323925639597405279346276074650,    .6259168704156479217603911980440097799511,
978     .47097971521879100631480241645476780831830,    .6243902439024390243902439024390243902439,
979     .47341577001667212165614273544633761048330,    .6228710462287104622871046228710462287105,
980     .47584590486996386493601107758877333253630,    .6213592233009708737864077669902912621359,
981     .47827014848147025860569669930555392056700,    .6198547215496368038740920096852300242131,
982     .48068852934575190261057286988943815231330,    .6183574879227053140096618357487922705314,
983     .48310107575113581113157579238759353756900,    .6168674698795180722891566265060240963855,
984     .48550781578170076890899053978500887751580,    .6153846153846153846153846153846153846154,
985     .48790877731923892879351001283794175833480,    .6139088729016786570743405275779376498801,
986     .49030398804519381705802061333088204264650,    .6124401913875598086124401913875598086124,
987     .49269347544257524607047571407747454941280,    .6109785202863961813842482100238663484487,
988     .49507726679785146739476431321236304938800,    .6095238095238095238095238095238095238095,
989     .49745538920281889838648226032091770321130,    .6080760095011876484560570071258907363420,
990     .49982786955644931126130359189119189977650,    .6066350710900473933649289099526066350711,
991     .50219473456671548383667413872899487614650,    .6052009456264775413711583924349881796690,
992     .50455601075239520092452494282042607665050,    .6037735849056603773584905660377358490566,
993     .50691172444485432801997148999362252652650,    .6023529411764705882352941176470588235294,
994     .50926190178980790257412536448100581765150,    .6009389671361502347417840375586854460094,
995     .51160656874906207391973111953120678663250,    .5995316159250585480093676814988290398126,
996     .51394575110223428282552049495279788970950,    .5981308411214953271028037383177570093458,
997     .51627947444845445623684554448118433356300,    .5967365967365967365967365967365967365967,
998     .51860776420804555186805373523384332656850,    .5953488372093023255813953488372093023256,
999     .52093064562418522900344441950437612831600,    .5939675174013921113689095127610208816705,
1000     .52324814376454775732838697877014055848100,    .5925925925925925925925925925925925925926,
1001     .52556028352292727401362526507000438869000,    .5912240184757505773672055427251732101617,
1002     .52786708962084227803046587723656557500350,    .5898617511520737327188940092165898617512,
1003     .53016858660912158374145519701414741575700,    .5885057471264367816091954022988505747126,
1004     .53246479886947173376654518506256863474850,    .5871559633027522935779816513761467889908,
1005     .53475575061602764748158733709715306758900,    .5858123569794050343249427917620137299771,
1006     .53704146589688361856929077475797384977350,    .5844748858447488584474885844748858447489,
1007     .53932196859560876944783558428753167390800,    .5831435079726651480637813211845102505695,
1008     .54159728243274429804188230264117009937750,    .5818181818181818181818181818181818181818,
1009     .54386743096728351609669971367111429572100,    .5804988662131519274376417233560090702948,
1010     .54613243759813556721383065450936555862450,    .5791855203619909502262443438914027149321,
1011     .54839232556557315767520321969641372561450,    .5778781038374717832957110609480812641084,
1012     .55064711795266219063194057525834068655950,    .5765765765765765765765765765765765765766,
1013     .55289683768667763352766542084282264113450,    .5752808988764044943820224719101123595506,
1014     .55514150754050151093110798683483153581600,    .5739910313901345291479820627802690582960,
1015     .55738115013400635344709144192165695130850,    .5727069351230425055928411633109619686801,
1016     .55961578793542265941596269840374588966350,    .5714285714285714285714285714285714285714,
1017     .56184544326269181269140062795486301183700,    .5701559020044543429844097995545657015590,
1018     .56407013828480290218436721261241473257550,    .5688888888888888888888888888888888888889,
1019     .56628989502311577464155334382667206227800,    .5676274944567627494456762749445676274945,
1020     .56850473535266865532378233183408156037350,    .5663716814159292035398230088495575221239,
1021     .57071468100347144680739575051120482385150,    .5651214128035320088300220750551876379691,
1022     .57291975356178548306473885531886480748650,    .5638766519823788546255506607929515418502,
1023     .57511997447138785144460371157038025558000,    .5626373626373626373626373626373626373626,
1024     .57731536503482350219940144597785547375700,    .5614035087719298245614035087719298245614,
1025     .57950594641464214795689713355386629700650,    .5601750547045951859956236323851203501094,
1026     .58169173963462239562716149521293118596100,    .5589519650655021834061135371179039301310,
1027     .58387276558098266665552955601015128195300,    .5577342047930283224400871459694989106754,
1028     .58604904500357812846544902640744112432000,    .5565217391304347826086956521739130434783,
1029     .58822059851708596855957011939608491957200,    .5553145336225596529284164859002169197397,
1030     .59038744660217634674381770309992134571100,    .5541125541125541125541125541125541125541,
1031     .59254960960667157898740242671919986605650,    .5529157667386609071274298056155507559395,
1032     .59470710774669277576265358220553025603300,    .5517241379310344827586206896551724137931,
1033     .59685996110779382384237123915227130055450,    .5505376344086021505376344086021505376344,
1034     .59900818964608337768851242799428291618800,    .5493562231759656652360515021459227467811,
1035     .60115181318933474940990890900138765573500,    .5481798715203426124197002141327623126338,
1036     .60329085143808425240052883964381180703650,    .5470085470085470085470085470085470085470,
1037     .60542532396671688843525771517306566238400,    .5458422174840085287846481876332622601279,
1038     .60755525022454170969155029524699784815300,    .5446808510638297872340425531914893617021,
1039     .60968064953685519036241657886421307921400,    .5435244161358811040339702760084925690021,
1040     .61180154110599282990534675263916142284850,    .5423728813559322033898305084745762711864,
1041     .61391794401237043121710712512140162289150,    .5412262156448202959830866807610993657505,
1042     .61602987721551394351138242200249806046500,    .5400843881856540084388185654008438818565,
1043     .61813735955507864705538167982012964785100,    .5389473684210526315789473684210526315789,
1044     .62024040975185745772080281312810257077200,    .5378151260504201680672268907563025210084,
1045     .62233904640877868441606324267922900617100,    .5366876310272536687631027253668763102725,
1046     .62443328801189346144440150965237990021700,    .5355648535564853556485355648535564853556,
1047     .62652315293135274476554741340805776417250,    .5344467640918580375782881002087682672234,
1048     .62860865942237409420556559780379757285100,    .5333333333333333333333333333333333333333,
1049     .63068982562619868570408243613201193511500,    .5322245322245322245322245322245322245322,
1050     .63276666957103777644277897707070223987100,    .5311203319502074688796680497925311203320,
1051     .63483920917301017716738442686619237065300,    .5300207039337474120082815734989648033126,
1052     .63690746223706917739093569252872839570050,    .5289256198347107438016528925619834710744,
1053     .63897144645792069983514238629140891134750,    .5278350515463917525773195876288659793814,
1054     .64103117942093124081992527862894348800200,    .5267489711934156378600823045267489711934,
1055     .64308667860302726193566513757104985415950,    .5256673511293634496919917864476386036961,
1056     .64513796137358470073053240412264131009600,    .5245901639344262295081967213114754098361,
1057     .64718504499530948859131740391603671014300,    .5235173824130879345603271983640081799591,
1058     .64922794662510974195157587018911726772800,    .5224489795918367346938775510204081632653,
1059     .65126668331495807251485530287027359008800,    .5213849287169042769857433808553971486762,
1060     .65330127201274557080523663898929953575150,    .5203252032520325203252032520325203252033,
1061     .65533172956312757406749369692988693714150,    .5192697768762677484787018255578093306288,
1062     .65735807270835999727154330685152672231200,    .5182186234817813765182186234817813765182,
1063     .65938031808912778153342060249997302889800,    .5171717171717171717171717171717171717172,
1064     .66139848224536490484126716182800009846700,    .5161290322580645161290322580645161290323,
1065     .66341258161706617713093692145776003599150,    .5150905432595573440643863179074446680080,
1066     .66542263254509037562201001492212526500250,    .5140562248995983935742971887550200803213,
1067     .66742865127195616370414654738851822912700,    .5130260521042084168336673346693386773547,
1068     .66943065394262923906154583164607174694550,    .5120000000000000000000000000000000000000,
1069     .67142865660530226534774556057527661323550,    .5109780439121756487025948103792415169661,
1070     .67342267521216669923234121597488410770900,    .5099601593625498007968127490039840637450,
1071     .67541272562017662384192817626171745359900,    .5089463220675944333996023856858846918489,
1072     .67739882359180603188519853574689477682100,    .5079365079365079365079365079365079365079,
1073     .67938098479579733801614338517538271844400,    .5069306930693069306930693069306930693069,
1074     .68135922480790300781450241629499942064300,    .5059288537549407114624505928853754940711,
1075     .68333355911162063645036823800182901322850,    .5049309664694280078895463510848126232742,
1076     .68530400309891936760919861626462079584600,    .5039370078740157480314960629921259842520,
1077     .68727057207096020619019327568821609020250,    .5029469548133595284872298624754420432220,
1078     .68923328123880889251040571252815425395950,    .5019607843137254901960784313725490196078,
1079     .69314718055994530941723212145818, 5.0e-01,
1080 };
1081 
1082 
1083 
1084 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
1085 static const double ln_2 = 0.69314718055994530941723212145818;
1086 
log(const float * _x,float * y,int n)1087 void log( const float *_x, float *y, int n )
1088 {
1089     static const float shift[] = { 0, -1.f/512 };
1090     static const float
1091     A0 = 0.3333333333333333333333333f,
1092     A1 = -0.5f,
1093     A2 = 1.f;
1094 
1095 #undef LOGPOLY
1096 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x))
1097 
1098     int i = 0;
1099     Cv32suf buf[4];
1100     const int* x = (const int*)_x;
1101 
1102 #if CV_SSE2
1103     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1104     static const __m128 _1_4 = _mm_set1_ps(1.f);
1105     static const __m128 shift4 = _mm_set1_ps(-1.f/512);
1106 
1107     static const __m128 mA0 = _mm_set1_ps(A0);
1108     static const __m128 mA1 = _mm_set1_ps(A1);
1109     static const __m128 mA2 = _mm_set1_ps(A2);
1110 
1111     int CV_DECL_ALIGNED(16) idx[4];
1112 
1113     for( ; i <= n - 4; i += 4 )
1114     {
1115         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1116         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127));
1117         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1118         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2);
1119 
1120         __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23));
1121 
1122         h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2));
1123         _mm_store_si128((__m128i*)idx, h0);
1124         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1125 
1126         __m128d t0, t1, t2, t3, t4;
1127         t0 = _mm_load_pd(icvLogTab + idx[0]);
1128         t2 = _mm_load_pd(icvLogTab + idx[1]);
1129         t1 = _mm_unpackhi_pd(t0, t2);
1130         t0 = _mm_unpacklo_pd(t0, t2);
1131         t2 = _mm_load_pd(icvLogTab + idx[2]);
1132         t4 = _mm_load_pd(icvLogTab + idx[3]);
1133         t3 = _mm_unpackhi_pd(t2, t4);
1134         t2 = _mm_unpacklo_pd(t2, t4);
1135 
1136         yd0 = _mm_add_pd(yd0, t0);
1137         yd1 = _mm_add_pd(yd1, t2);
1138 
1139         __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
1140 
1141         __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4);
1142         xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3)));
1143         xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4));
1144 
1145         __m128 zf0 = _mm_mul_ps(xf0, mA0);
1146         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0);
1147         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0);
1148         yf0 = _mm_add_ps(yf0, zf0);
1149 
1150         _mm_storeu_ps(y + i, yf0);
1151     }
1152 #endif
1153     for( ; i <= n - 4; i += 4 )
1154     {
1155         double x0, x1, x2, x3;
1156         double y0, y1, y2, y3;
1157         int h0, h1, h2, h3;
1158 
1159         h0 = x[i];
1160         h1 = x[i+1];
1161         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1162         buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23);
1163 
1164         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1165         y1 = (((h1 >> 23) & 0xff) - 127) * ln_2;
1166 
1167         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1168         h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1169 
1170         y0 += icvLogTab[h0];
1171         y1 += icvLogTab[h1];
1172 
1173         h2 = x[i+2];
1174         h3 = x[i+3];
1175 
1176         x0 = LOGTAB_TRANSLATE( buf[0].f, h0 );
1177         x1 = LOGTAB_TRANSLATE( buf[1].f, h1 );
1178 
1179         buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23);
1180         buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23);
1181 
1182         y2 = (((h2 >> 23) & 0xff) - 127) * ln_2;
1183         y3 = (((h3 >> 23) & 0xff) - 127) * ln_2;
1184 
1185         h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1186         h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1187 
1188         y2 += icvLogTab[h2];
1189         y3 += icvLogTab[h3];
1190 
1191         x2 = LOGTAB_TRANSLATE( buf[2].f, h2 );
1192         x3 = LOGTAB_TRANSLATE( buf[3].f, h3 );
1193 
1194         x0 += shift[h0 == 510];
1195         x1 += shift[h1 == 510];
1196         y0 += LOGPOLY( x0 );
1197         y1 += LOGPOLY( x1 );
1198 
1199         y[i] = (float) y0;
1200         y[i + 1] = (float) y1;
1201 
1202         x2 += shift[h2 == 510];
1203         x3 += shift[h3 == 510];
1204         y2 += LOGPOLY( x2 );
1205         y3 += LOGPOLY( x3 );
1206 
1207         y[i + 2] = (float) y2;
1208         y[i + 3] = (float) y3;
1209     }
1210 
1211     for( ; i < n; i++ )
1212     {
1213         int h0 = x[i];
1214         double y0;
1215         float x0;
1216 
1217         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1218 
1219         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1220         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1221 
1222         y0 += icvLogTab[h0];
1223         x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 );
1224         x0 += shift[h0 == 510];
1225         y0 += LOGPOLY( x0 );
1226 
1227         y[i] = (float)y0;
1228     }
1229 }
1230 
log(const double * x,double * y,int n)1231 void log( const double *x, double *y, int n )
1232 {
1233     static const double shift[] = { 0, -1./512 };
1234     static const double
1235     A7 = 1.0,
1236     A6 = -0.5,
1237     A5 = 0.333333333333333314829616256247390992939472198486328125,
1238     A4 = -0.25,
1239     A3 = 0.2,
1240     A2 = -0.1666666666666666574148081281236954964697360992431640625,
1241     A1 = 0.1428571428571428769682682968777953647077083587646484375,
1242     A0 = -0.125;
1243 
1244 #undef LOGPOLY
1245 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\
1246 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \
1247 (((A1*xq + A3)*xq + A5)*xq + A7)*(x))
1248 
1249     int i = 0;
1250     DBLINT buf[4];
1251     DBLINT *X = (DBLINT *) x;
1252 
1253 #if CV_SSE2
1254     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1255     static const __m128d _1_2 = _mm_set1_pd(1.);
1256     static const __m128d shift2 = _mm_set1_pd(-1./512);
1257 
1258     static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff);
1259     static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0);
1260 
1261     static const __m128d mA0 = _mm_set1_pd(A0);
1262     static const __m128d mA1 = _mm_set1_pd(A1);
1263     static const __m128d mA2 = _mm_set1_pd(A2);
1264     static const __m128d mA3 = _mm_set1_pd(A3);
1265     static const __m128d mA4 = _mm_set1_pd(A4);
1266     static const __m128d mA5 = _mm_set1_pd(A5);
1267     static const __m128d mA6 = _mm_set1_pd(A6);
1268     static const __m128d mA7 = _mm_set1_pd(A7);
1269 
1270     int CV_DECL_ALIGNED(16) idx[4];
1271 
1272     for( ; i <= n - 4; i += 4 )
1273     {
1274         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1275         __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2));
1276 
1277         __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2));
1278         __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2));
1279 
1280         h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1));
1281 
1282         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20),
1283                                                   _mm_set1_epi32(2047)), _mm_set1_epi32(1023));
1284         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1285         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2);
1286 
1287         h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2));
1288         _mm_store_si128((__m128i*)idx, h0);
1289         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1290 
1291         __m128d t0, t1, t2, t3, t4;
1292         t0 = _mm_load_pd(icvLogTab + idx[0]);
1293         t2 = _mm_load_pd(icvLogTab + idx[1]);
1294         t1 = _mm_unpackhi_pd(t0, t2);
1295         t0 = _mm_unpacklo_pd(t0, t2);
1296         t2 = _mm_load_pd(icvLogTab + idx[2]);
1297         t4 = _mm_load_pd(icvLogTab + idx[3]);
1298         t3 = _mm_unpackhi_pd(t2, t4);
1299         t2 = _mm_unpacklo_pd(t2, t4);
1300 
1301         yd0 = _mm_add_pd(yd0, t0);
1302         yd1 = _mm_add_pd(yd1, t2);
1303 
1304         xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1);
1305         xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3);
1306 
1307         xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2));
1308         xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2));
1309 
1310         __m128d zd0 = _mm_mul_pd(xd0, mA0);
1311         __m128d zd1 = _mm_mul_pd(xd1, mA0);
1312         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0);
1313         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1);
1314         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0);
1315         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1);
1316         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0);
1317         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1);
1318         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0);
1319         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1);
1320         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0);
1321         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1);
1322         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0);
1323         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1);
1324         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0);
1325         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1);
1326 
1327         yd0 = _mm_add_pd(yd0, zd0);
1328         yd1 = _mm_add_pd(yd1, zd1);
1329 
1330         _mm_storeu_pd(y + i, yd0);
1331         _mm_storeu_pd(y + i + 2, yd1);
1332     }
1333 #endif
1334     for( ; i <= n - 4; i += 4 )
1335     {
1336         double xq;
1337         double x0, x1, x2, x3;
1338         double y0, y1, y2, y3;
1339         int h0, h1, h2, h3;
1340 
1341         h0 = X[i].i.lo;
1342         h1 = X[i + 1].i.lo;
1343         buf[0].i.lo = h0;
1344         buf[1].i.lo = h1;
1345 
1346         h0 = X[i].i.hi;
1347         h1 = X[i + 1].i.hi;
1348         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
1349         buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20);
1350 
1351         y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
1352         y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2;
1353 
1354         h2 = X[i + 2].i.lo;
1355         h3 = X[i + 3].i.lo;
1356         buf[2].i.lo = h2;
1357         buf[3].i.lo = h3;
1358 
1359         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1360         h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1361 
1362         y0 += icvLogTab[h0];
1363         y1 += icvLogTab[h1];
1364 
1365         h2 = X[i + 2].i.hi;
1366         h3 = X[i + 3].i.hi;
1367 
1368         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
1369         x1 = LOGTAB_TRANSLATE( buf[1].d, h1 );
1370 
1371         buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20);
1372         buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20);
1373 
1374         y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2;
1375         y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2;
1376 
1377         h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1378         h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1379 
1380         y2 += icvLogTab[h2];
1381         y3 += icvLogTab[h3];
1382 
1383         x2 = LOGTAB_TRANSLATE( buf[2].d, h2 );
1384         x3 = LOGTAB_TRANSLATE( buf[3].d, h3 );
1385 
1386         y0 += LOGPOLY( x0, h0 == 510 );
1387         y1 += LOGPOLY( x1, h1 == 510 );
1388 
1389         y[i] = y0;
1390         y[i + 1] = y1;
1391 
1392         y2 += LOGPOLY( x2, h2 == 510 );
1393         y3 += LOGPOLY( x3, h3 == 510 );
1394 
1395         y[i + 2] = y2;
1396         y[i + 3] = y3;
1397     }
1398 
1399     for( ; i < n; i++ )
1400     {
1401         int h0 = X[i].i.hi;
1402         double xq;
1403         double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
1404 
1405         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
1406         buf[0].i.lo = X[i].i.lo;
1407         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1408 
1409         y0 += icvLogTab[h0];
1410         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
1411         y0 += LOGPOLY( x0, h0 == 510 );
1412         y[i] = y0;
1413     }
1414 }
1415 
1416 }}
1417